From 0ed23defed4e13a8485669279718de3f33f90ceb Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Wed, 15 Oct 2025 22:08:58 -0700 Subject: [PATCH 01/99] PPL tostring() implementation issue #4492 Signed-off-by: Asif Bashar --- .../sql/calcite/utils/PPLOperandTypes.java | 7 + .../function/PPLBuiltinOperators.java | 2 + .../expression/function/PPLFuncImpTable.java | 3 + .../function/udf/ToStringFunction.java | 162 +++++++++++++++ .../function/udf/ToStringFunctionTest.java | 149 ++++++++++++++ docs/user/ppl/functions/conversion.rst | 92 +++++++++ docs/user/ppl/functions/string.rst | 3 + ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 +- .../sql/ppl/parser/AstExpressionBuilder.java | 11 +- .../calcite/CalcitePPLStringFunctionTest.java | 185 ++++++++++++++++++ sql/src/main/antlr/OpenSearchSQLLexer.g4 | 1 + sql/src/main/antlr/OpenSearchSQLParser.g4 | 1 + 13 files changed, 621 insertions(+), 2 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java create mode 100644 core/src/test/java/org/opensearch/sql/expression/function/udf/ToStringFunctionTest.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index 12a9a297542..69e1492538c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -112,6 +112,13 @@ private PPLOperandTypes() {} SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)); + public static final UDFOperandMetadata BOOLEAN_OR_NUMERIC_STRING_OR_STRING_STRING = + UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + OperandTypes.family(SqlTypeFamily.BOOLEAN) + .or(OperandTypes.family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)) + .or(OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING))); + public static final UDFOperandMetadata NUMERIC_NUMERIC_OPTIONAL_NUMERIC = UDFOperandMetadata.wrap( (CompositeOperandTypeChecker) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 76f625ebd38..984f398b1ee 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -66,6 +66,7 @@ import org.opensearch.sql.expression.function.udf.RexExtractMultiFunction; import org.opensearch.sql.expression.function.udf.RexOffsetFunction; import org.opensearch.sql.expression.function.udf.SpanFunction; +import org.opensearch.sql.expression.function.udf.ToStringFunction; import org.opensearch.sql.expression.function.udf.condition.EarliestFunction; import org.opensearch.sql.expression.function.udf.condition.EnhancedCoalesceFunction; import org.opensearch.sql.expression.function.udf.condition.LatestFunction; @@ -411,6 +412,7 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("multi_match", false); public static final SqlOperator NUMBER_TO_STRING = new NumberToStringFunction().toUDF("NUMBER_TO_STRING"); + public static final SqlOperator TOSTRING = new ToStringFunction().toUDF("TOSTRING"); public static final SqlOperator WIDTH_BUCKET = new org.opensearch.sql.expression.function.udf.binning.WidthBucketFunction() .toUDF("WIDTH_BUCKET"); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 8297ecf73ce..de89df383d2 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -210,6 +210,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPDIFF; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIME_FORMAT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIME_TO_SEC; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TOSTRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TO_DAYS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TO_SECONDS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TRANSFORM; @@ -887,6 +888,7 @@ void populate() { registerOperator(WEEKOFYEAR, PPLBuiltinOperators.WEEK); registerOperator(INTERNAL_PATTERN_PARSER, PPLBuiltinOperators.PATTERN_PARSER); + registerOperator(TOSTRING, PPLBuiltinOperators.TOSTRING); // Register MVJOIN to use Calcite's ARRAY_JOIN register( @@ -1058,6 +1060,7 @@ void populate() { SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)), false)); + register( LOG, (FunctionImp2) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java new file mode 100644 index 00000000000..70d2a82b13c --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java @@ -0,0 +1,162 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.text.NumberFormat; +import java.time.Duration; +import java.util.List; +import java.util.Locale; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.function.Strict; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; +import org.opensearch.sql.calcite.utils.PPLReturnTypes; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * A custom implementation of number/boolean to string . + * + *

This operator is necessary because tostring has following requirements "binary" Converts a + * number to a binary value. "hex" Converts the number to a hexadecimal value. "commas" Formats the + * number with commas. If the number includes a decimal, the function rounds the number to nearest + * two decimal places. "duration" Converts the value in seconds to the readable time format + * HH:MM:SS. if not format parameter provided, then consider value as boolean + */ +public class ToStringFunction extends ImplementorUDF { + public ToStringFunction() { + super( + new org.opensearch.sql.expression.function.udf.ToStringFunction.ToStringImplementor(), + NullPolicy.ANY); + } + + public static final String DURATION_FORMAT = "duration"; + public static final String HEX_FORMAT = "hex"; + public static final String COMMAS_FORMAT = "commas"; + public static final String BINARY_FORMAT = "binary"; + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return PPLReturnTypes.STRING_FORCE_NULLABLE; + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return PPLOperandTypes.BOOLEAN_OR_NUMERIC_STRING_OR_STRING_STRING; + } + + public static class ToStringImplementor implements NotNullImplementor { + + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + Expression fieldValue = translatedOperands.get(0); + if (translatedOperands.size() > 1) { + Expression format = translatedOperands.get(1); + return Expressions.call(ToStringFunction.class, "toString", fieldValue, format); + } else { + return Expressions.call(ToStringFunction.class, "toString", fieldValue); + } + } + } + + @Strict + public static String toString(boolean fieldValue) { + if (fieldValue) { + return "True"; + } else { + return "False"; + } + } + + @Strict + public static String toString(String fieldValue) { + return toString(Boolean.parseBoolean(fieldValue)); + } + + @Strict + public static String toString(BigDecimal num, String format) { + if (format.equals(DURATION_FORMAT)) { + Duration d = Duration.ofSeconds(num.toBigInteger().longValue()); + long hours = d.toHours(); + int minutes = d.toMinutesPart(); + int remainingSeconds = d.toSecondsPart(); + + String time_str = String.format("%02d:%02d:%02d", hours, minutes, remainingSeconds); + return time_str; + } else if (format.equals(HEX_FORMAT)) { + return num.toBigInteger().toString(16); + } else if (format.equals(COMMAS_FORMAT)) { + NumberFormat nf = NumberFormat.getNumberInstance(Locale.getDefault()); + nf.setMinimumFractionDigits(0); + nf.setMaximumFractionDigits(2); + return nf.format(num); + + } else if (format.equals(BINARY_FORMAT)) { + BigInteger integerPart = num.toBigInteger(); // 42 + return integerPart.toString(2); + } + return num.toString(); + } + + @Strict + public static String toString(double num, String format) { + if (format.equals(DURATION_FORMAT)) { + Duration d = Duration.ofSeconds(Math.round(num)); + long hours = d.toHours(); + int minutes = d.toMinutesPart(); + int remainingSeconds = d.toSecondsPart(); + String time_str = String.format("%02d:%02d:%02d", hours, minutes, remainingSeconds); + return time_str; + } else if (format.equals(HEX_FORMAT)) { + return Double.toHexString(num); + } else if (format.equals(COMMAS_FORMAT)) { + NumberFormat nf = NumberFormat.getNumberInstance(Locale.getDefault()); + return nf.format(num); + } else if (format.equals(BINARY_FORMAT)) { + return Long.toBinaryString(Double.doubleToLongBits(num)); + } + return Double.toString(num); + } + + @Strict + public static String toString(int num, String format) { + + if (format.equals(DURATION_FORMAT)) { + + int hours = num / 3600; + int minutes = (num % 3600) / 60; + int seconds = num % 60; + + String time_str = String.format("%02d:%02d:%02d", hours, minutes, seconds); + return time_str; + } else if (format.equals(HEX_FORMAT)) { + return Integer.toHexString(num); + } else if (format.equals(COMMAS_FORMAT)) { + NumberFormat nf = NumberFormat.getNumberInstance(Locale.getDefault()); + return nf.format(num); + } else if (format.equals(BINARY_FORMAT)) { + return Integer.toBinaryString(num); + } + return Integer.toString(num); + } + + @Strict + public static String toString(String str, String format) { + if (str.contains(".") || (str.length() > 10)) { + return toString(Double.parseDouble(str), format); + } else { + return toString(Integer.parseInt(str), format); + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToStringFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToStringFunctionTest.java new file mode 100644 index 00000000000..cfd25796b23 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToStringFunctionTest.java @@ -0,0 +1,149 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import org.junit.jupiter.api.Test; + +import java.math.BigDecimal; +import java.util.Locale; + +import static org.junit.jupiter.api.Assertions.*; + +public class ToStringFunctionTest { + + private final ToStringFunction function = new ToStringFunction(); + + @Test + void testBooleanToString() { + assertEquals("True", ToStringFunction.toString(true)); + assertEquals("False", ToStringFunction.toString(false)); + } + + @Test + void testStringBooleanToString() { + assertEquals("True", ToStringFunction.toString("true")); + assertEquals("False", ToStringFunction.toString("false")); + assertEquals("False", ToStringFunction.toString("anythingElse")); + } + + @Test + void testBigDecimalToStringDurationFormat() { + BigDecimal num = new BigDecimal("3661"); // 1 hour 1 minute 1 second + String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); + assertEquals("01:01:01", result); + } + + @Test + void testBigDecimalToStringHexFormat() { + BigDecimal num = new BigDecimal("255"); + String result = ToStringFunction.toString(num, ToStringFunction.HEX_FORMAT); + assertEquals("ff", result); + } + + @Test + void testBigDecimalToStringCommasFormat() { + Locale.setDefault(Locale.US); // Ensure predictable comma placement + BigDecimal num = new BigDecimal("1234567.891"); + String result = ToStringFunction.toString(num, ToStringFunction.COMMAS_FORMAT); + assertTrue(result.contains(",")); + } + + @Test + void testBigDecimalToStringBinaryFormat() { + BigDecimal num = new BigDecimal("10"); + String result = ToStringFunction.toString(num, ToStringFunction.BINARY_FORMAT); + assertEquals("1010", result); + } + + @Test + void testBigDecimalToStringDefault() { + BigDecimal num = new BigDecimal("123.45"); + assertEquals("123.45", ToStringFunction.toString(num, "unknown")); + } + + @Test + void testDoubleToStringDurationFormat() { + double num = 3661.4; + String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); + assertEquals("01:01:01", result); + } + + @Test + void testDoubleToStringHexFormat() { + double num = 10.5; + String result = ToStringFunction.toString(num, ToStringFunction.HEX_FORMAT); + assertTrue(result.startsWith("0x")); + } + + @Test + void testDoubleToStringCommasFormat() { + Locale.setDefault(Locale.US); + double num = 12345.678; + String result = ToStringFunction.toString(num, ToStringFunction.COMMAS_FORMAT); + assertTrue(result.contains(",")); + } + + @Test + void testDoubleToStringBinaryFormat() { + double num = 10.0; + String result = ToStringFunction.toString(num, ToStringFunction.BINARY_FORMAT); + assertNotNull(result); + assertFalse(result.isEmpty()); + } + + @Test + void testDoubleToStringDefault() { + assertEquals("10.5", ToStringFunction.toString(10.5, "unknown")); + } + + @Test + void testIntToStringDurationFormat() { + int num = 3661; + String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); + assertEquals("01:01:01", result); + } + + @Test + void testIntToStringHexFormat() { + assertEquals("ff", ToStringFunction.toString(255, ToStringFunction.HEX_FORMAT)); + } + + @Test + void testIntToStringCommasFormat() { + Locale.setDefault(Locale.US); + String result = ToStringFunction.toString(1234567, ToStringFunction.COMMAS_FORMAT); + assertTrue(result.contains(",")); + } + + @Test + void testIntToStringBinaryFormat() { + assertEquals("1010", ToStringFunction.toString(10, ToStringFunction.BINARY_FORMAT)); + } + + @Test + void testIntToStringDefault() { + assertEquals("123", ToStringFunction.toString(123, "unknown")); + } + + @Test + void testStringNumericToStringIntFormat() { + String result = ToStringFunction.toString("42", ToStringFunction.HEX_FORMAT); + assertEquals("2a", result); + } + + @Test + void testStringNumericToStringDoubleFormat() { + String result = ToStringFunction.toString("42.5", ToStringFunction.COMMAS_FORMAT); + assertTrue(result.contains("42")); + } + + @Test + void testStringLargeNumberAsDouble() { + String largeNum = "1234567890123"; + String result = ToStringFunction.toString(largeNum, ToStringFunction.BINARY_FORMAT); + assertNotNull(result); + } +} diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index dbe4403540c..21124c27edc 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -78,3 +78,95 @@ Cast function can be chained:: |-------| | True | +-------+ + +TOSTRING +----- + +Description +>>>>>>>>>>> +There are two available usage based on paraemter types and number of parameters. +Usage with format type: tostring(number|string, string) converts the number in first argument to provided format type string in second argument. + Return type: string +Usage for boolean parameter without format type: tostring(boolean) converts the string to 'True' or 'False'. + Return type: string + +You can use this function with the eval commands and as part of eval expressions. +The first argument can be a number, number as string or boolean. +If first argument is a a number or number as string , second argument need to be format name. +If first argument is boolean, then second argument is not needed. + +format types: +a) "binary" Converts a number to a binary value. +b) "hex" Converts the number to a hexadecimal value. +c) "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. +d) "duration" Converts the value in seconds to the readable time format HH:MM:SS. +The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. + +Binary conversion +You can use this function to convert a number to a string of its binary representation. For example, the result of the following function is 1001, because the binary representation of 9 is 1001.: +eval result = tostring(9, "binary") + +For information about bitwise functions that you can use with the tostring function, see Bitwise functions. + +Basic examples +The following example returns "True 0xF 12,345.68". +... | eval n=tostring(1==1) + " " + tostring(15, "hex") + " " + tostring(12345.6789, "commas") +The following example returns foo=615 and foo2=00:10:15. The 615 seconds is converted into minutes and seconds. + +... | eval foo=615 | eval foo2 = tostring(foo, "duration") +The following example formats the column totalSales to display values with a currency symbol and commas. You must use a period between the currency value and the tostring function. + +Example:: + + os> source=people | eval `boolean_str` = tostring(1=1)| fields `boolean_str` + fetched rows / total rows = 1/1 + +---------------------+ + | boolean_str | + |---------------------+ + | True | + +---------------------+ + os> source=EMP | eval salary_binary = tostring(SAL, "binary") | fields ENAME, salary_binary, SAL" + fetched rows / total rows = 1/1 + +---------------+------------------+------------+ + | ENAME | salary_binary | SAL | + |---------------+------------------+------------+ + | SMITH | 1001110001000000 | 80000.00 | + +---------------+------------------+------------+ + os> source=EMP | eval salary_hex = tostring(SAL, "hex") | fields ENAME, salary_hex, SAL" + fetched rows / total rows = 1/1 + +---------------+------------------+------------+ + | ENAME | salary_hex | SAL | + |---------------+------------------+------------+ + | SMITH | 13880 | 80000.00 | + +---------------+---------------+---------------+ + + os> source=EMP | eval salary_commas = tostring(SAL, "commas") | fields ENAME, salary_commas, SAL" + fetched rows / total rows = 1/1 + +---------------+------------------+------------+ + | ENAME | salary_commas | SAL | + |---------------+------------------+------------+ + | SMITH | 80,000 | 80000.00 | + +---------------+------------------+------------+ + + + duration + + os> source=EMP | eval duration = tostring(6500, "duration") | fields ENAME, duration" + fetched rows / total rows = 1/1 + +---------------+-------------+ + | ENAME | duration | + |---------------+-------------+ + | SMITH | 01:48:20 | + +---------------+-------------+ + +Usage for boolean parameter without format type:: + +Example:: + + os> source=people | eval `boolean_str` = tostring(1=1)| fields `boolean_str` + fetched rows / total rows = 1/1 + +---------------------+ + | boolean_str | + |---------------------+ + | True | + +---------------------+ \ No newline at end of file diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index 24efa1434f5..43efed3c470 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -397,3 +397,6 @@ Example:: |---------------------+---------------------| | HELLOWORLD | HELLOWORLD | +---------------------+---------------------+ + + + diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index ba1e4960bb2..e528bb553ab 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -392,6 +392,7 @@ STRFTIME: 'STRFTIME'; // TEXT FUNCTIONS SUBSTR: 'SUBSTR'; SUBSTRING: 'SUBSTRING'; +TOSTRING: 'TOSTRING'; LTRIM: 'LTRIM'; RTRIM: 'RTRIM'; TRIM: 'TRIM'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index e13447b68e9..852c55c863c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -847,11 +847,14 @@ evalFunctionCall : evalFunctionName LT_PRTHS functionArgs RT_PRTHS ; -// cast function + +// cast, tostring function dataTypeFunctionCall : CAST LT_PRTHS logicalExpression AS convertedDataType RT_PRTHS + | TOSTRING LT_PRTHS functionArgs RT_PRTHS ; + convertedDataType : typeName = DATE | typeName = TIME @@ -1434,6 +1437,7 @@ searchableKeyWord | USING | VALUE | CAST + | TOSTRING | GET_FORMAT | EXTRACT | INTERVAL diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index f037376f5c2..1460a3efc9a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -414,7 +414,16 @@ private Function buildFunction( /** Cast function. */ @Override public UnresolvedExpression visitDataTypeFunctionCall(DataTypeFunctionCallContext ctx) { - return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); + ParseTree rootNode = ctx.getChild(0); + String functionName = rootNode.getText(); + final String mappedName = + FUNCTION_NAME_MAPPING.getOrDefault(functionName.toLowerCase(Locale.ROOT), functionName); + + if (mappedName.equals("cast")) { + return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); + } else { + return buildFunction(mappedName, ctx.functionArgs().functionArg()); + } } @Override diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java index 1e97052dea0..d41b2c22453 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java @@ -46,6 +46,191 @@ public void testLower() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testToStringBoolean() { + String ppl = "source=EMP | eval boolean_value = tostring(1==1) | fields boolean_value |head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(boolean_value=[TOSTRING(true)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = "boolean_value=True\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TOSTRING`(TRUE) `boolean_value`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testToStringBin() { + String ppl = + "source=EMP | eval salary_binary = tostring(SAL, \"binary\") | fields ENAME," + + " salary_binary, SAL"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(ENAME=[$1], salary_binary=[TOSTRING($5, 'binary':VARCHAR)], SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = + "ENAME=SMITH; salary_binary=1100100000; SAL=800.00\n" + + "ENAME=ALLEN; salary_binary=11001000000; SAL=1600.00\n" + + "ENAME=WARD; salary_binary=10011100010; SAL=1250.00\n" + + "ENAME=JONES; salary_binary=101110011111; SAL=2975.00\n" + + "ENAME=MARTIN; salary_binary=10011100010; SAL=1250.00\n" + + "ENAME=BLAKE; salary_binary=101100100010; SAL=2850.00\n" + + "ENAME=CLARK; salary_binary=100110010010; SAL=2450.00\n" + + "ENAME=SCOTT; salary_binary=101110111000; SAL=3000.00\n" + + "ENAME=KING; salary_binary=1001110001000; SAL=5000.00\n" + + "ENAME=TURNER; salary_binary=10111011100; SAL=1500.00\n" + + "ENAME=ADAMS; salary_binary=10001001100; SAL=1100.00\n" + + "ENAME=JAMES; salary_binary=1110110110; SAL=950.00\n" + + "ENAME=FORD; salary_binary=101110111000; SAL=3000.00\n" + + "ENAME=MILLER; salary_binary=10100010100; SAL=1300.00\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`(`SAL`, 'binary') `salary_binary`, `SAL`\nFROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testToStringHex() { + String ppl = + "source=EMP | eval salary_hex = tostring(SAL, \"hex\") | fields ENAME, salary_hex, SAL"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(ENAME=[$1], salary_hex=[TOSTRING($5, 'hex':VARCHAR)], SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = + "ENAME=SMITH; salary_hex=320; SAL=800.00\n" + + "ENAME=ALLEN; salary_hex=640; SAL=1600.00\n" + + "ENAME=WARD; salary_hex=4e2; SAL=1250.00\n" + + "ENAME=JONES; salary_hex=b9f; SAL=2975.00\n" + + "ENAME=MARTIN; salary_hex=4e2; SAL=1250.00\n" + + "ENAME=BLAKE; salary_hex=b22; SAL=2850.00\n" + + "ENAME=CLARK; salary_hex=992; SAL=2450.00\n" + + "ENAME=SCOTT; salary_hex=bb8; SAL=3000.00\n" + + "ENAME=KING; salary_hex=1388; SAL=5000.00\n" + + "ENAME=TURNER; salary_hex=5dc; SAL=1500.00\n" + + "ENAME=ADAMS; salary_hex=44c; SAL=1100.00\n" + + "ENAME=JAMES; salary_hex=3b6; SAL=950.00\n" + + "ENAME=FORD; salary_hex=bb8; SAL=3000.00\n" + + "ENAME=MILLER; salary_hex=514; SAL=1300.00\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`(`SAL`, 'hex') `salary_hex`, `SAL`\nFROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testToStringHexFromNumberAsString() { + String ppl = + "source=EMP | eval salary_hex = tostring(\"1600\", \"hex\") | fields ENAME, salary_hex| head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n LogicalProject(ENAME=[$1], salary_hex=[TOSTRING('1600':VARCHAR, 'hex':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = + "ENAME=SMITH; salary_hex=640\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`('1600', 'hex') `salary_hex`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testToStringCommaFromNumberAsString() { + String ppl = + "source=EMP | eval salary_comma = tostring(\"160040222\", \"commas\") | fields ENAME, salary_comma| head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n LogicalProject(ENAME=[$1], salary_comma=[TOSTRING('160040222':VARCHAR, 'commas':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = + "ENAME=SMITH; salary_comma=160,040,222\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`('160040222', 'commas') `salary_comma`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test + public void testToStringBinaryFromNumberAsString() { + String ppl = + "source=EMP | eval salary_binary = tostring(\"160040222\", \"binary\") | fields ENAME, salary_binary| head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n LogicalProject(ENAME=[$1], salary_binary=[TOSTRING('160040222':VARCHAR, 'binary':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = + "ENAME=SMITH; salary_binary=1001100010100000010100011110\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`('160040222', 'binary') `salary_binary`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test + public void testToStringCommas() { + String ppl = + "source=EMP | eval salary_commas = tostring(SAL, \"commas\") | fields ENAME," + + " salary_commas, SAL"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(ENAME=[$1], salary_commas=[TOSTRING($5, 'commas':VARCHAR)], SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = + "ENAME=SMITH; salary_commas=800; SAL=800.00\n" + + "ENAME=ALLEN; salary_commas=1,600; SAL=1600.00\n" + + "ENAME=WARD; salary_commas=1,250; SAL=1250.00\n" + + "ENAME=JONES; salary_commas=2,975; SAL=2975.00\n" + + "ENAME=MARTIN; salary_commas=1,250; SAL=1250.00\n" + + "ENAME=BLAKE; salary_commas=2,850; SAL=2850.00\n" + + "ENAME=CLARK; salary_commas=2,450; SAL=2450.00\n" + + "ENAME=SCOTT; salary_commas=3,000; SAL=3000.00\n" + + "ENAME=KING; salary_commas=5,000; SAL=5000.00\n" + + "ENAME=TURNER; salary_commas=1,500; SAL=1500.00\n" + + "ENAME=ADAMS; salary_commas=1,100; SAL=1100.00\n" + + "ENAME=JAMES; salary_commas=950; SAL=950.00\n" + + "ENAME=FORD; salary_commas=3,000; SAL=3000.00\n" + + "ENAME=MILLER; salary_commas=1,300; SAL=1300.00\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`(`SAL`, 'commas') `salary_commas`, `SAL`\nFROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testToStringDuration() { + String ppl = + "source=EMP | eval duration_commas = tostring(6500, \"duration\") | fields ENAME," + + " duration_commas|HEAD 1"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(ENAME=[$1], duration_commas=[TOSTRING(6500, 'duration':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = "ENAME=SMITH; duration_commas=01:48:20\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`(6500, 'duration') `duration_commas`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testLike() { String ppl = "source=EMP | where like(JOB, 'SALE%') | stats count() as cnt"; diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index ba7c5be85ab..6465c692da3 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -133,6 +133,7 @@ STDDEV_SAMP: 'STDDEV_SAMP'; SUBSTRING: 'SUBSTRING'; TRIM: 'TRIM'; +TOSTRING: 'TOSTRING'; // Keywords, but can be ID // Common Keywords, but can be ID diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 5f7361160b3..fbaef12fb98 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -417,6 +417,7 @@ specificFunction : CASE expression caseFuncAlternative+ (ELSE elseArg = functionArg)? END # caseFunctionCall | CASE caseFuncAlternative+ (ELSE elseArg = functionArg)? END # caseFunctionCall | CAST '(' expression AS convertedDataType ')' # dataTypeFunctionCall + | TOSTRING '(' functionArg ')' # dataTypeFunctionCall ; relevanceFunction From 5a5b778cdf83230ff8a68a259a628e3ee0b3f318 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 16 Oct 2025 09:33:26 -0700 Subject: [PATCH 02/99] removed sql changes Signed-off-by: Asif Bashar --- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 1 - sql/src/main/antlr/OpenSearchSQLParser.g4 | 1 - 2 files changed, 2 deletions(-) diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 6465c692da3..ba7c5be85ab 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -133,7 +133,6 @@ STDDEV_SAMP: 'STDDEV_SAMP'; SUBSTRING: 'SUBSTRING'; TRIM: 'TRIM'; -TOSTRING: 'TOSTRING'; // Keywords, but can be ID // Common Keywords, but can be ID diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index fbaef12fb98..5f7361160b3 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -417,7 +417,6 @@ specificFunction : CASE expression caseFuncAlternative+ (ELSE elseArg = functionArg)? END # caseFunctionCall | CASE caseFuncAlternative+ (ELSE elseArg = functionArg)? END # caseFunctionCall | CAST '(' expression AS convertedDataType ')' # dataTypeFunctionCall - | TOSTRING '(' functionArg ')' # dataTypeFunctionCall ; relevanceFunction From 9d71a95f8487a7c5fa0e4cea8d11b4a3893efcaf Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 16 Oct 2025 09:50:53 -0700 Subject: [PATCH 03/99] doc changes Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 4 ++-- docs/user/ppl/functions/string.rst | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 21124c27edc..33af0075431 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -95,14 +95,14 @@ The first argument can be a number, number as string or boolean. If first argument is a a number or number as string , second argument need to be format name. If first argument is boolean, then second argument is not needed. -format types: +Format types: a) "binary" Converts a number to a binary value. b) "hex" Converts the number to a hexadecimal value. c) "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. d) "duration" Converts the value in seconds to the readable time format HH:MM:SS. The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. -Binary conversion +Binary conversion: You can use this function to convert a number to a string of its binary representation. For example, the result of the following function is 1001, because the binary representation of 9 is 1001.: eval result = tostring(9, "binary") diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index 43efed3c470..c1dd52a5d89 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -397,6 +397,4 @@ Example:: |---------------------+---------------------| | HELLOWORLD | HELLOWORLD | +---------------------+---------------------+ - - - + From be2c2e264a8cc337940f3f1ae2152a15672ce2cb Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 16 Oct 2025 09:52:09 -0700 Subject: [PATCH 04/99] docs changes Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 12 ++++++------ docs/user/ppl/functions/string.rst | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 33af0075431..9d0bf6eab1b 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -85,15 +85,15 @@ TOSTRING Description >>>>>>>>>>> There are two available usage based on paraemter types and number of parameters. -Usage with format type: tostring(number|string, string) converts the number in first argument to provided format type string in second argument. - Return type: string +Usage with format type: tostring(ANY, [format]) converts the number in first argument to provided format type string in second argument. + Return type: string Usage for boolean parameter without format type: tostring(boolean) converts the string to 'True' or 'False'. - Return type: string + Return type: string You can use this function with the eval commands and as part of eval expressions. The first argument can be a number, number as string or boolean. -If first argument is a a number or number as string , second argument need to be format name. -If first argument is boolean, then second argument is not needed. +If first argument can be any valid type , second argument is optional and if provided , it needs to be format name. +If first argument is boolean, then second argument is not used even if its provided. Format types: a) "binary" Converts a number to a binary value. @@ -108,7 +108,7 @@ eval result = tostring(9, "binary") For information about bitwise functions that you can use with the tostring function, see Bitwise functions. -Basic examples +Basic examples: The following example returns "True 0xF 12,345.68". ... | eval n=tostring(1==1) + " " + tostring(15, "hex") + " " + tostring(12345.6789, "commas") The following example returns foo=615 and foo2=00:10:15. The 615 seconds is converted into minutes and seconds. diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index c1dd52a5d89..01b9b85b882 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -396,5 +396,4 @@ Example:: | UPPER('helloworld') | UPPER('HELLOWORLD') | |---------------------+---------------------| | HELLOWORLD | HELLOWORLD | - +---------------------+---------------------+ - + +---------------------+---------------------+ \ No newline at end of file From fc763a431cbbdcad3fe35ab3f0e70f334d0b12a6 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 16 Oct 2025 10:00:16 -0700 Subject: [PATCH 05/99] reverted string doc changes Signed-off-by: Asif Bashar --- docs/user/ppl/functions/string.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index 01b9b85b882..24efa1434f5 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -396,4 +396,4 @@ Example:: | UPPER('helloworld') | UPPER('HELLOWORLD') | |---------------------+---------------------| | HELLOWORLD | HELLOWORLD | - +---------------------+---------------------+ \ No newline at end of file + +---------------------+---------------------+ From a04eb14529029cf51d020e75ba5e7e6f921fb07c Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 16 Oct 2025 10:06:46 -0700 Subject: [PATCH 06/99] removed extra word Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 9d0bf6eab1b..58171d7cc72 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -91,7 +91,7 @@ Usage for boolean parameter without format type: tostring(boolean) converts the Return type: string You can use this function with the eval commands and as part of eval expressions. -The first argument can be a number, number as string or boolean. +The first argument can be a number, number as string or boolean. If first argument can be any valid type , second argument is optional and if provided , it needs to be format name. If first argument is boolean, then second argument is not used even if its provided. @@ -148,9 +148,6 @@ Example:: | SMITH | 80,000 | 80000.00 | +---------------+------------------+------------+ - - duration - os> source=EMP | eval duration = tostring(6500, "duration") | fields ENAME, duration" fetched rows / total rows = 1/1 +---------------+-------------+ From 590a8e63add13b75fa90047cd2eca1ab4d802936 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 16 Oct 2025 10:24:25 -0700 Subject: [PATCH 07/99] added any type Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 38 +++++++++++--------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 58171d7cc72..c4ebe0505de 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -85,27 +85,22 @@ TOSTRING Description >>>>>>>>>>> There are two available usage based on paraemter types and number of parameters. -Usage with format type: tostring(ANY, [format]) converts the number in first argument to provided format type string in second argument. +Usage with format type: tostring(ANY, [format]) converts the number in first argument to provided format type string in second argument. If non number type, then it converts to default string representation. Return type: string Usage for boolean parameter without format type: tostring(boolean) converts the string to 'True' or 'False'. Return type: string - You can use this function with the eval commands and as part of eval expressions. -The first argument can be a number, number as string or boolean. -If first argument can be any valid type , second argument is optional and if provided , it needs to be format name. + +If first argument can be any valid type , second argument is optional and if provided , it needs to be format name to convert to where first argument contains only numbers. If first argument is boolean, then second argument is not used even if its provided. -Format types: +Format types:: a) "binary" Converts a number to a binary value. b) "hex" Converts the number to a hexadecimal value. c) "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. d) "duration" Converts the value in seconds to the readable time format HH:MM:SS. The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. -Binary conversion: -You can use this function to convert a number to a string of its binary representation. For example, the result of the following function is 1001, because the binary representation of 9 is 1001.: -eval result = tostring(9, "binary") - For information about bitwise functions that you can use with the tostring function, see Bitwise functions. Basic examples: @@ -114,17 +109,11 @@ The following example returns "True 0xF 12,345.68". The following example returns foo=615 and foo2=00:10:15. The 615 seconds is converted into minutes and seconds. ... | eval foo=615 | eval foo2 = tostring(foo, "duration") -The following example formats the column totalSales to display values with a currency symbol and commas. You must use a period between the currency value and the tostring function. -Example:: - os> source=people | eval `boolean_str` = tostring(1=1)| fields `boolean_str` - fetched rows / total rows = 1/1 - +---------------------+ - | boolean_str | - |---------------------+ - | True | - +---------------------+ + +You can use this function to convert a number to a string of its binary representation. +Example:: os> source=EMP | eval salary_binary = tostring(SAL, "binary") | fields ENAME, salary_binary, SAL" fetched rows / total rows = 1/1 +---------------+------------------+------------+ @@ -132,6 +121,10 @@ Example:: |---------------+------------------+------------+ | SMITH | 1001110001000000 | 80000.00 | +---------------+------------------+------------+ + + +You can use this function to convert a number to a string of its hex representation. +Example:: os> source=EMP | eval salary_hex = tostring(SAL, "hex") | fields ENAME, salary_hex, SAL" fetched rows / total rows = 1/1 +---------------+------------------+------------+ @@ -140,6 +133,8 @@ Example:: | SMITH | 13880 | 80000.00 | +---------------+---------------+---------------+ +The following example formats the column totalSales to display values with commas. +Example:: os> source=EMP | eval salary_commas = tostring(SAL, "commas") | fields ENAME, salary_commas, SAL" fetched rows / total rows = 1/1 +---------------+------------------+------------+ @@ -147,7 +142,8 @@ Example:: |---------------+------------------+------------+ | SMITH | 80,000 | 80000.00 | +---------------+------------------+------------+ - +The following example converts number of seconds to HH:MM:SS format representing hours, minutes and seconds. +Example:: os> source=EMP | eval duration = tostring(6500, "duration") | fields ENAME, duration" fetched rows / total rows = 1/1 +---------------+-------------+ @@ -156,10 +152,8 @@ Example:: | SMITH | 01:48:20 | +---------------+-------------+ -Usage for boolean parameter without format type:: - +Example for boolean parameter. Example:: - os> source=people | eval `boolean_str` = tostring(1=1)| fields `boolean_str` fetched rows / total rows = 1/1 +---------------------+ From 6938e2c08dfef04119f1e37802d0a3a9554a0d21 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 16 Oct 2025 11:00:43 -0700 Subject: [PATCH 08/99] doc formatting fixes Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 38 ++++++++++++++++---------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index c4ebe0505de..280befee0aa 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -80,40 +80,44 @@ Cast function can be chained:: +-------+ TOSTRING ------ +----------- Description >>>>>>>>>>> -There are two available usage based on paraemter types and number of parameters. -Usage with format type: tostring(ANY, [format]) converts the number in first argument to provided format type string in second argument. If non number type, then it converts to default string representation. - Return type: string -Usage for boolean parameter without format type: tostring(boolean) converts the string to 'True' or 'False'. - Return type: string -You can use this function with the eval commands and as part of eval expressions. +The following usage options are available, depending on the parameter types and the number of parameters. -If first argument can be any valid type , second argument is optional and if provided , it needs to be format name to convert to where first argument contains only numbers. -If first argument is boolean, then second argument is not used even if its provided. +Usage with format type: tostring(ANY, [format]): Converts the number in first argument to provided format type string in second argument. If non number type, then it converts to default string representation. +Return type: string + +Usage for boolean parameter without format type tostring(boolean): Converts the string to 'True' or 'False'. +Return type: string + +You can use this function with the eval commands and as part of eval expressions. If first argument can be any valid type , second argument is optional and if provided , it needs to be format name to convert to where first argument contains only numbers. If first argument is boolean, then second argument is not used even if its provided. + +Format types: -Format types:: a) "binary" Converts a number to a binary value. b) "hex" Converts the number to a hexadecimal value. c) "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. d) "duration" Converts the value in seconds to the readable time format HH:MM:SS. -The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. -For information about bitwise functions that you can use with the tostring function, see Bitwise functions. +The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. Basic examples: + The following example returns "True 0xF 12,345.68". -... | eval n=tostring(1==1) + " " + tostring(15, "hex") + " " + tostring(12345.6789, "commas") + + ... | eval n=tostring(1==1) + " " + tostring(15, "hex") + " " + tostring(12345.6789, "commas") + The following example returns foo=615 and foo2=00:10:15. The 615 seconds is converted into minutes and seconds. -... | eval foo=615 | eval foo2 = tostring(foo, "duration") + ... | eval foo=615 | eval foo2 = tostring(foo, "duration") You can use this function to convert a number to a string of its binary representation. Example:: + os> source=EMP | eval salary_binary = tostring(SAL, "binary") | fields ENAME, salary_binary, SAL" fetched rows / total rows = 1/1 +---------------+------------------+------------+ @@ -125,6 +129,7 @@ Example:: You can use this function to convert a number to a string of its hex representation. Example:: + os> source=EMP | eval salary_hex = tostring(SAL, "hex") | fields ENAME, salary_hex, SAL" fetched rows / total rows = 1/1 +---------------+------------------+------------+ @@ -135,6 +140,7 @@ Example:: The following example formats the column totalSales to display values with commas. Example:: + os> source=EMP | eval salary_commas = tostring(SAL, "commas") | fields ENAME, salary_commas, SAL" fetched rows / total rows = 1/1 +---------------+------------------+------------+ @@ -142,8 +148,10 @@ Example:: |---------------+------------------+------------+ | SMITH | 80,000 | 80000.00 | +---------------+------------------+------------+ + The following example converts number of seconds to HH:MM:SS format representing hours, minutes and seconds. Example:: + os> source=EMP | eval duration = tostring(6500, "duration") | fields ENAME, duration" fetched rows / total rows = 1/1 +---------------+-------------+ @@ -154,8 +162,10 @@ Example:: Example for boolean parameter. Example:: + os> source=people | eval `boolean_str` = tostring(1=1)| fields `boolean_str` fetched rows / total rows = 1/1 + +---------------------+ | boolean_str | |---------------------+ From 314fccdaee8ab28ede0eab2f1da4e2b83cc24e96 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 16 Oct 2025 11:04:06 -0700 Subject: [PATCH 09/99] description for boolean example Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 280befee0aa..036a89daf00 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -160,7 +160,7 @@ Example:: | SMITH | 01:48:20 | +---------------+-------------+ -Example for boolean parameter. +The following example for converts boolean parameter to string. Example:: os> source=people | eval `boolean_str` = tostring(1=1)| fields `boolean_str` From 0ee17b9a4a262a1d6070b98ae8a1162f6556f517 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 17 Oct 2025 13:21:49 -0700 Subject: [PATCH 10/99] added format_time call from calcite , added duration_millis as splunk default duration is in seconds which will be used for duration format , added cast call for tostring with 1 argument Signed-off-by: Asif Bashar --- .../sql/calcite/utils/PPLOperandTypes.java | 2 +- .../function/udf/ToStringFunction.java | 53 ++-- .../function/udf/ToStringFunctionTest.java | 265 +++++++++--------- .../sql/ppl/parser/AstExpressionBuilder.java | 52 +++- .../calcite/CalcitePPLStringFunctionTest.java | 157 +++++++---- 5 files changed, 312 insertions(+), 217 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index 69e1492538c..20811f1af48 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -117,7 +117,7 @@ private PPLOperandTypes() {} (CompositeOperandTypeChecker) OperandTypes.family(SqlTypeFamily.BOOLEAN) .or(OperandTypes.family(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING)) - .or(OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING))); + .or(OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING))); public static final UDFOperandMetadata NUMERIC_NUMERIC_OPTIONAL_NUMERIC = UDFOperandMetadata.wrap( diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java index 70d2a82b13c..be76100f38d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToStringFunction.java @@ -8,7 +8,6 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.text.NumberFormat; -import java.time.Duration; import java.util.List; import java.util.Locale; import org.apache.calcite.adapter.enumerable.NotNullImplementor; @@ -18,6 +17,7 @@ import org.apache.calcite.linq4j.tree.Expression; import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.runtime.SqlFunctions; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.calcite.utils.PPLReturnTypes; @@ -41,9 +41,13 @@ public ToStringFunction() { } public static final String DURATION_FORMAT = "duration"; + public static final String DURATION_MILLIS_FORMAT = "duration_millis"; public static final String HEX_FORMAT = "hex"; public static final String COMMAS_FORMAT = "commas"; public static final String BINARY_FORMAT = "binary"; + public static final SqlFunctions.DateFormatFunction dateTimeFormatter = + new SqlFunctions.DateFormatFunction(); + public static final String format24hour = "%H:%M:%S"; // 24-hour format @Override public SqlReturnTypeInference getReturnTypeInference() { @@ -65,7 +69,13 @@ public Expression implement( Expression format = translatedOperands.get(1); return Expressions.call(ToStringFunction.class, "toString", fieldValue, format); } else { - return Expressions.call(ToStringFunction.class, "toString", fieldValue); + // autoboxes to Boolean + + if (!fieldValue.getType().getTypeName().equals("Boolean")) { + return Expressions.call(ToStringFunction.class, "toString", fieldValue); + } else { + return Expressions.call(ToStringFunction.class, "toString", fieldValue); + } } } } @@ -87,13 +97,13 @@ public static String toString(String fieldValue) { @Strict public static String toString(BigDecimal num, String format) { if (format.equals(DURATION_FORMAT)) { - Duration d = Duration.ofSeconds(num.toBigInteger().longValue()); - long hours = d.toHours(); - int minutes = d.toMinutesPart(); - int remainingSeconds = d.toSecondsPart(); - String time_str = String.format("%02d:%02d:%02d", hours, minutes, remainingSeconds); - return time_str; + return dateTimeFormatter.formatTime(format24hour, num.toBigInteger().intValue() * 1000); + + } else if (format.equals(DURATION_MILLIS_FORMAT)) { + + return dateTimeFormatter.formatTime(format24hour, num.toBigInteger().intValue()); + } else if (format.equals(HEX_FORMAT)) { return num.toBigInteger().toString(16); } else if (format.equals(COMMAS_FORMAT)) { @@ -112,12 +122,11 @@ public static String toString(BigDecimal num, String format) { @Strict public static String toString(double num, String format) { if (format.equals(DURATION_FORMAT)) { - Duration d = Duration.ofSeconds(Math.round(num)); - long hours = d.toHours(); - int minutes = d.toMinutesPart(); - int remainingSeconds = d.toSecondsPart(); - String time_str = String.format("%02d:%02d:%02d", hours, minutes, remainingSeconds); - return time_str; + return dateTimeFormatter.formatTime(format24hour, ((int) Math.round(num)) * 1000); + } else if (format.equals(DURATION_MILLIS_FORMAT)) { + + return dateTimeFormatter.formatTime(format24hour, ((int) Math.round(num))); + } else if (format.equals(HEX_FORMAT)) { return Double.toHexString(num); } else if (format.equals(COMMAS_FORMAT)) { @@ -129,17 +138,19 @@ public static String toString(double num, String format) { return Double.toString(num); } + @Strict + public static String toString(short num, String format) { + int i = (int) num; + return toString(i, format); + } + @Strict public static String toString(int num, String format) { if (format.equals(DURATION_FORMAT)) { - - int hours = num / 3600; - int minutes = (num % 3600) / 60; - int seconds = num % 60; - - String time_str = String.format("%02d:%02d:%02d", hours, minutes, seconds); - return time_str; + return dateTimeFormatter.formatTime(format24hour, num * 1000); + } else if (format.equals(DURATION_MILLIS_FORMAT)) { + return dateTimeFormatter.formatTime(format24hour, num); } else if (format.equals(HEX_FORMAT)) { return Integer.toHexString(num); } else if (format.equals(COMMAS_FORMAT)) { diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToStringFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToStringFunctionTest.java index cfd25796b23..f0867f93327 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToStringFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToStringFunctionTest.java @@ -5,145 +5,144 @@ package org.opensearch.sql.expression.function.udf; -import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; import java.math.BigDecimal; import java.util.Locale; - -import static org.junit.jupiter.api.Assertions.*; +import org.junit.jupiter.api.Test; public class ToStringFunctionTest { private final ToStringFunction function = new ToStringFunction(); - @Test - void testBooleanToString() { - assertEquals("True", ToStringFunction.toString(true)); - assertEquals("False", ToStringFunction.toString(false)); - } - - @Test - void testStringBooleanToString() { - assertEquals("True", ToStringFunction.toString("true")); - assertEquals("False", ToStringFunction.toString("false")); - assertEquals("False", ToStringFunction.toString("anythingElse")); - } - - @Test - void testBigDecimalToStringDurationFormat() { - BigDecimal num = new BigDecimal("3661"); // 1 hour 1 minute 1 second - String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); - assertEquals("01:01:01", result); - } - - @Test - void testBigDecimalToStringHexFormat() { - BigDecimal num = new BigDecimal("255"); - String result = ToStringFunction.toString(num, ToStringFunction.HEX_FORMAT); - assertEquals("ff", result); - } - - @Test - void testBigDecimalToStringCommasFormat() { - Locale.setDefault(Locale.US); // Ensure predictable comma placement - BigDecimal num = new BigDecimal("1234567.891"); - String result = ToStringFunction.toString(num, ToStringFunction.COMMAS_FORMAT); - assertTrue(result.contains(",")); - } - - @Test - void testBigDecimalToStringBinaryFormat() { - BigDecimal num = new BigDecimal("10"); - String result = ToStringFunction.toString(num, ToStringFunction.BINARY_FORMAT); - assertEquals("1010", result); - } - - @Test - void testBigDecimalToStringDefault() { - BigDecimal num = new BigDecimal("123.45"); - assertEquals("123.45", ToStringFunction.toString(num, "unknown")); - } - - @Test - void testDoubleToStringDurationFormat() { - double num = 3661.4; - String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); - assertEquals("01:01:01", result); - } - - @Test - void testDoubleToStringHexFormat() { - double num = 10.5; - String result = ToStringFunction.toString(num, ToStringFunction.HEX_FORMAT); - assertTrue(result.startsWith("0x")); - } - - @Test - void testDoubleToStringCommasFormat() { - Locale.setDefault(Locale.US); - double num = 12345.678; - String result = ToStringFunction.toString(num, ToStringFunction.COMMAS_FORMAT); - assertTrue(result.contains(",")); - } - - @Test - void testDoubleToStringBinaryFormat() { - double num = 10.0; - String result = ToStringFunction.toString(num, ToStringFunction.BINARY_FORMAT); - assertNotNull(result); - assertFalse(result.isEmpty()); - } - - @Test - void testDoubleToStringDefault() { - assertEquals("10.5", ToStringFunction.toString(10.5, "unknown")); - } - - @Test - void testIntToStringDurationFormat() { - int num = 3661; - String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); - assertEquals("01:01:01", result); - } - - @Test - void testIntToStringHexFormat() { - assertEquals("ff", ToStringFunction.toString(255, ToStringFunction.HEX_FORMAT)); - } - - @Test - void testIntToStringCommasFormat() { - Locale.setDefault(Locale.US); - String result = ToStringFunction.toString(1234567, ToStringFunction.COMMAS_FORMAT); - assertTrue(result.contains(",")); - } - - @Test - void testIntToStringBinaryFormat() { - assertEquals("1010", ToStringFunction.toString(10, ToStringFunction.BINARY_FORMAT)); - } - - @Test - void testIntToStringDefault() { - assertEquals("123", ToStringFunction.toString(123, "unknown")); - } - - @Test - void testStringNumericToStringIntFormat() { - String result = ToStringFunction.toString("42", ToStringFunction.HEX_FORMAT); - assertEquals("2a", result); - } - - @Test - void testStringNumericToStringDoubleFormat() { - String result = ToStringFunction.toString("42.5", ToStringFunction.COMMAS_FORMAT); - assertTrue(result.contains("42")); - } - - @Test - void testStringLargeNumberAsDouble() { - String largeNum = "1234567890123"; - String result = ToStringFunction.toString(largeNum, ToStringFunction.BINARY_FORMAT); - assertNotNull(result); - } + @Test + void testBooleanToString() { + assertEquals("True", ToStringFunction.toString(true)); + assertEquals("False", ToStringFunction.toString(false)); + } + + @Test + void testStringBooleanToString() { + assertEquals("True", ToStringFunction.toString("true")); + assertEquals("False", ToStringFunction.toString("false")); + assertEquals("False", ToStringFunction.toString("anythingElse")); + } + + @Test + void testBigDecimalToStringDurationFormat() { + BigDecimal num = new BigDecimal("3661"); // 1 hour 1 minute 1 second + String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); + assertEquals("01:01:01", result); + } + + @Test + void testBigDecimalToStringHexFormat() { + BigDecimal num = new BigDecimal("255"); + String result = ToStringFunction.toString(num, ToStringFunction.HEX_FORMAT); + assertEquals("ff", result); + } + + @Test + void testBigDecimalToStringCommasFormat() { + Locale.setDefault(Locale.US); // Ensure predictable comma placement + BigDecimal num = new BigDecimal("1234567.891"); + String result = ToStringFunction.toString(num, ToStringFunction.COMMAS_FORMAT); + assertTrue(result.contains(",")); + } + + @Test + void testBigDecimalToStringBinaryFormat() { + BigDecimal num = new BigDecimal("10"); + String result = ToStringFunction.toString(num, ToStringFunction.BINARY_FORMAT); + assertEquals("1010", result); + } + + @Test + void testBigDecimalToStringDefault() { + BigDecimal num = new BigDecimal("123.45"); + assertEquals("123.45", ToStringFunction.toString(num, "unknown")); + } + + @Test + void testDoubleToStringDurationFormat() { + double num = 3661.4; + String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); + assertEquals("01:01:01", result); + } + + @Test + void testDoubleToStringHexFormat() { + double num = 10.5; + String result = ToStringFunction.toString(num, ToStringFunction.HEX_FORMAT); + assertTrue(result.startsWith("0x")); + } + + @Test + void testDoubleToStringCommasFormat() { + Locale.setDefault(Locale.US); + double num = 12345.678; + String result = ToStringFunction.toString(num, ToStringFunction.COMMAS_FORMAT); + assertTrue(result.contains(",")); + } + + @Test + void testDoubleToStringBinaryFormat() { + double num = 10.0; + String result = ToStringFunction.toString(num, ToStringFunction.BINARY_FORMAT); + assertNotNull(result); + assertFalse(result.isEmpty()); + } + + @Test + void testDoubleToStringDefault() { + assertEquals("10.5", ToStringFunction.toString(10.5, "unknown")); + } + + @Test + void testIntToStringDurationFormat() { + int num = 3661; + String result = ToStringFunction.toString(num, ToStringFunction.DURATION_FORMAT); + assertEquals("01:01:01", result); + } + + @Test + void testIntToStringHexFormat() { + assertEquals("ff", ToStringFunction.toString(255, ToStringFunction.HEX_FORMAT)); + } + + @Test + void testIntToStringCommasFormat() { + Locale.setDefault(Locale.US); + String result = ToStringFunction.toString(1234567, ToStringFunction.COMMAS_FORMAT); + assertTrue(result.contains(",")); + } + + @Test + void testIntToStringBinaryFormat() { + assertEquals("1010", ToStringFunction.toString(10, ToStringFunction.BINARY_FORMAT)); + } + + @Test + void testIntToStringDefault() { + assertEquals("123", ToStringFunction.toString(123, "unknown")); + } + + @Test + void testStringNumericToStringIntFormat() { + String result = ToStringFunction.toString("42", ToStringFunction.HEX_FORMAT); + assertEquals("2a", result); + } + + @Test + void testStringNumericToStringDoubleFormat() { + String result = ToStringFunction.toString("42.5", ToStringFunction.COMMAS_FORMAT); + assertTrue(result.contains("42")); + } + + @Test + void testStringLargeNumberAsDouble() { + String largeNum = "1234567890123"; + String result = ToStringFunction.toString(largeNum, ToStringFunction.BINARY_FORMAT); + assertNotNull(result); + } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 1460a3efc9a..2112282f728 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -19,6 +19,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; +import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.RuleContext; import org.antlr.v4.runtime.tree.ParseTree; @@ -29,8 +30,10 @@ import org.opensearch.sql.ast.expression.subquery.ScalarSubquery; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.calcite.plan.OpenSearchConstants; +import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.BinaryArithmeticContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.BooleanLiteralContext; @@ -411,18 +414,51 @@ private Function buildFunction( functionName, args.stream().map(this::visitFunctionArg).collect(Collectors.toList())); } + public DataTypeFunctionCallContext createDataTypeFunctionCallContext(String castExpression) { + // Create a case-insensitive character stream from the input + CaseInsensitiveCharStream charStream = new CaseInsensitiveCharStream(castExpression); + + // Create lexer and parser + OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(charStream); + CommonTokenStream tokens = new CommonTokenStream(lexer); + OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens); + + // Parse the expression - cast is part of evalFunctionCall + DataTypeFunctionCallContext evalContext = parser.dataTypeFunctionCall(); + return evalContext; + } + /** Cast function. */ @Override public UnresolvedExpression visitDataTypeFunctionCall(DataTypeFunctionCallContext ctx) { - ParseTree rootNode = ctx.getChild(0); - String functionName = rootNode.getText(); - final String mappedName = - FUNCTION_NAME_MAPPING.getOrDefault(functionName.toLowerCase(Locale.ROOT), functionName); - - if (mappedName.equals("cast")) { - return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); + if (ctx.functionArgs() != null) { + + ParseTree rootNode = ctx.getChild(0); + String functionName = rootNode.getText(); + final String mappedName = + FUNCTION_NAME_MAPPING.getOrDefault(functionName.toLowerCase(Locale.ROOT), functionName); + System.out.println(mappedName); + if (mappedName != null && mappedName.equals("tostring")) { + if (ctx.functionArgs().functionArg().size() == 1) { + List functionArgs = + ctx.functionArgs().functionArg(); + + String castExpresstion = + String.format("cast( %s as String)", functionArgs.getFirst().getText()); + DataTypeFunctionCallContext toStringDataTypeConversionContext = + this.createDataTypeFunctionCallContext(castExpresstion); + return new Cast( + visit(toStringDataTypeConversionContext.logicalExpression()), + visit(toStringDataTypeConversionContext.convertedDataType())); + // + } else { + return buildFunction(mappedName, ctx.functionArgs().functionArg()); + } + } else { + return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); + } } else { - return buildFunction(mappedName, ctx.functionArgs().functionArg()); + return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java index d41b2c22453..b32ecd04e3f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java @@ -46,20 +46,55 @@ public void testLower() { verifyPPLToSparkSQL(root, expectedSparkSql); } + // This test evalutes tostring where it gets converted to cast call + + @Test + public void testToStringFormatNotSpecified() { + String ppl = + "source=EMP | eval string_value = tostring(MGR) | eval cast_value = cast(MGR as string)|" + + " fields string_value, cast_value"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(string_value=[SAFE_CAST($3)], cast_value=[SAFE_CAST($3)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = + "string_value=7902; cast_value=7902\n" + + "string_value=7698; cast_value=7698\n" + + "string_value=7698; cast_value=7698\n" + + "string_value=7839; cast_value=7839\n" + + "string_value=7698; cast_value=7698\n" + + "string_value=7839; cast_value=7839\n" + + "string_value=7839; cast_value=7839\n" + + "string_value=7566; cast_value=7566\n" + + "string_value=null; cast_value=null\n" + + "string_value=7698; cast_value=7698\n" + + "string_value=7788; cast_value=7788\n" + + "string_value=7698; cast_value=7698\n" + + "string_value=7566; cast_value=7566\n" + + "string_value=7782; cast_value=7782\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT SAFE_CAST(`MGR` AS STRING) `string_value`, SAFE_CAST(`MGR` AS STRING)" + + " `cast_value`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testToStringBoolean() { String ppl = "source=EMP | eval boolean_value = tostring(1==1) | fields boolean_value |head 1"; RelNode root = getRelNode(ppl); String expectedLogical = "LogicalSort(fetch=[1])\n" - + " LogicalProject(boolean_value=[TOSTRING(true)])\n" + + " LogicalProject(boolean_value=['TRUE':VARCHAR])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; - String expectedResult = "boolean_value=True\n"; + String expectedResult = "boolean_value=TRUE\n"; verifyLogical(root, expectedLogical); verifyResult(root, expectedResult); - String expectedSparkSql = - "SELECT `TOSTRING`(TRUE) `boolean_value`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + String expectedSparkSql = "SELECT 'TRUE' `boolean_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -126,56 +161,70 @@ public void testToStringHex() { verifyPPLToSparkSQL(root, expectedSparkSql); } - @Test - public void testToStringHexFromNumberAsString() { - String ppl = - "source=EMP | eval salary_hex = tostring(\"1600\", \"hex\") | fields ENAME, salary_hex| head 1"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalSort(fetch=[1])\n LogicalProject(ENAME=[$1], salary_hex=[TOSTRING('1600':VARCHAR, 'hex':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; - String expectedResult = - "ENAME=SMITH; salary_hex=640\n"; - verifyLogical(root, expectedLogical); - verifyResult(root, expectedResult); - - String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`('1600', 'hex') `salary_hex`\nFROM `scott`.`EMP`\nLIMIT 1"; - verifyPPLToSparkSQL(root, expectedSparkSql); - } - - @Test - public void testToStringCommaFromNumberAsString() { - String ppl = - "source=EMP | eval salary_comma = tostring(\"160040222\", \"commas\") | fields ENAME, salary_comma| head 1"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalSort(fetch=[1])\n LogicalProject(ENAME=[$1], salary_comma=[TOSTRING('160040222':VARCHAR, 'commas':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; - String expectedResult = - "ENAME=SMITH; salary_comma=160,040,222\n"; - verifyLogical(root, expectedLogical); - verifyResult(root, expectedResult); - - String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`('160040222', 'commas') `salary_comma`\nFROM `scott`.`EMP`\nLIMIT 1"; - verifyPPLToSparkSQL(root, expectedSparkSql); - } - @Test - public void testToStringBinaryFromNumberAsString() { - String ppl = - "source=EMP | eval salary_binary = tostring(\"160040222\", \"binary\") | fields ENAME, salary_binary| head 1"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalSort(fetch=[1])\n LogicalProject(ENAME=[$1], salary_binary=[TOSTRING('160040222':VARCHAR, 'binary':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; - String expectedResult = - "ENAME=SMITH; salary_binary=1001100010100000010100011110\n"; - verifyLogical(root, expectedLogical); - verifyResult(root, expectedResult); - - String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`('160040222', 'binary') `salary_binary`\nFROM `scott`.`EMP`\nLIMIT 1"; - verifyPPLToSparkSQL(root, expectedSparkSql); - } - @Test + @Test + public void testToStringHexFromNumberAsString() { + String ppl = + "source=EMP | eval salary_hex = tostring(\"1600\", \"hex\") | fields ENAME, salary_hex|" + + " head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(ENAME=[$1], salary_hex=[TOSTRING('1600':VARCHAR, 'hex':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = "ENAME=SMITH; salary_hex=640\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`('1600', 'hex') `salary_hex`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testToStringCommaFromNumberAsString() { + String ppl = + "source=EMP | eval salary_comma = tostring(\"160040222\", \"commas\") | fields ENAME," + + " salary_comma| head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(ENAME=[$1], salary_comma=[TOSTRING('160040222':VARCHAR," + + " 'commas':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = "ENAME=SMITH; salary_comma=160,040,222\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`('160040222', 'commas') `salary_comma`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testToStringBinaryFromNumberAsString() { + String ppl = + "source=EMP | eval salary_binary = tostring(\"160040222\", \"binary\") | fields ENAME," + + " salary_binary| head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(ENAME=[$1], salary_binary=[TOSTRING('160040222':VARCHAR," + + " 'binary':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + String expectedResult = "ENAME=SMITH; salary_binary=1001100010100000010100011110\n"; + verifyLogical(root, expectedLogical); + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `ENAME`, `TOSTRING`('160040222', 'binary') `salary_binary`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test public void testToStringCommas() { String ppl = "source=EMP | eval salary_commas = tostring(SAL, \"commas\") | fields ENAME," From 6e24aa3c4b16f6a5734db18f210b8b5c1f5f245f Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 17 Oct 2025 13:32:18 -0700 Subject: [PATCH 11/99] added doc update to specifically set 2nd argument as optional Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 036a89daf00..9f9646efef4 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -86,10 +86,10 @@ Description >>>>>>>>>>> The following usage options are available, depending on the parameter types and the number of parameters. -Usage with format type: tostring(ANY, [format]): Converts the number in first argument to provided format type string in second argument. If non number type, then it converts to default string representation. +Usage with format type: tostring(ANY, [format]): Converts the number in first argument to provided format type string in second argument. If second argument is not provided, then it converts to default string representation. Return type: string -Usage for boolean parameter without format type tostring(boolean): Converts the string to 'True' or 'False'. +Usage for boolean parameter without format type tostring(boolean): Converts the string to 'TRUE' or 'FALSE'. Return type: string You can use this function with the eval commands and as part of eval expressions. If first argument can be any valid type , second argument is optional and if provided , it needs to be format name to convert to where first argument contains only numbers. If first argument is boolean, then second argument is not used even if its provided. @@ -100,12 +100,13 @@ a) "binary" Converts a number to a binary value. b) "hex" Converts the number to a hexadecimal value. c) "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. d) "duration" Converts the value in seconds to the readable time format HH:MM:SS. +5) "duration_millis" Converts the value in milliseconds to the readable time format HH:MM:SS. The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. Basic examples: -The following example returns "True 0xF 12,345.68". +The following example returns "TRUE 0xF 12,345.68". ... | eval n=tostring(1==1) + " " + tostring(15, "hex") + " " + tostring(12345.6789, "commas") @@ -169,5 +170,5 @@ Example:: +---------------------+ | boolean_str | |---------------------+ - | True | + | TRUE | +---------------------+ \ No newline at end of file From 454cfc8db324866376f2452290b0fd0739008754 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 17 Oct 2025 13:33:59 -0700 Subject: [PATCH 12/99] mentioned as value instead of number specifically Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 9f9646efef4..4f2d432c71a 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -86,7 +86,7 @@ Description >>>>>>>>>>> The following usage options are available, depending on the parameter types and the number of parameters. -Usage with format type: tostring(ANY, [format]): Converts the number in first argument to provided format type string in second argument. If second argument is not provided, then it converts to default string representation. +Usage with format type: tostring(ANY, [format]): Converts the value in first argument to provided format type string in second argument. If second argument is not provided, then it converts to default string representation. Return type: string Usage for boolean parameter without format type tostring(boolean): Converts the string to 'TRUE' or 'FALSE'. From b221e8c4f583b947198253ee90b5edb6f15b382a Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 17 Oct 2025 13:46:56 -0700 Subject: [PATCH 13/99] fixed wrong bullet point Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 4f2d432c71a..708bde1fa11 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -100,7 +100,7 @@ a) "binary" Converts a number to a binary value. b) "hex" Converts the number to a hexadecimal value. c) "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. d) "duration" Converts the value in seconds to the readable time format HH:MM:SS. -5) "duration_millis" Converts the value in milliseconds to the readable time format HH:MM:SS. +e) "duration_millis" Converts the value in milliseconds to the readable time format HH:MM:SS. The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. From ea9ba0fa8b5b63053df344930dc218e83dc75f91 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 20 Oct 2025 12:41:39 -0700 Subject: [PATCH 14/99] added more unit tests Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunctionTest.java | 192 ++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java new file mode 100644 index 00000000000..d7f2b6fd685 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java @@ -0,0 +1,192 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import static org.junit.jupiter.api.Assertions.*; + +import java.math.BigDecimal; +import java.util.Locale; +import org.apache.calcite.sql.type.ReturnTypes; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; + +public class ToNumberFunctionTest { + + private final ToNumberFunction function = new ToNumberFunction(); + + @Test + void testGetReturnTypeInference() { + assertEquals(ReturnTypes.DOUBLE_FORCE_NULLABLE, function.getReturnTypeInference()); + } + + @Test + void testGetOperandMetadata() { + assertEquals(PPLOperandTypes.STRING_OR_STRING_INTEGER, function.getOperandMetadata()); + } + + @Test + void testToNumberWithDefaultBase() { + assertEquals(123, ToNumberFunction.toNumber("123")); + assertEquals(0, ToNumberFunction.toNumber("0")); + assertEquals(-456, ToNumberFunction.toNumber("-456")); + assertEquals(123.45, ToNumberFunction.toNumber("123.45")); + assertEquals(-123.45, ToNumberFunction.toNumber("-123.45")); + assertEquals(0.5, ToNumberFunction.toNumber("0.5")); + assertEquals(-0.5, ToNumberFunction.toNumber("-0.5")); + } + + @Test + void testToNumberWithBase10() { + assertEquals(123, ToNumberFunction.toNumber("123", 10)); + assertEquals(0, ToNumberFunction.toNumber("0", 10)); + assertEquals(-456, ToNumberFunction.toNumber("-456", 10)); + assertEquals(123.45, ToNumberFunction.toNumber("123.45", 10)); + assertEquals(-123.45, ToNumberFunction.toNumber("-123.45", 10)); + } + + @Test + void testToNumberWithBase2() { + assertEquals(5, ToNumberFunction.toNumber("101", 2)); + assertEquals(0, ToNumberFunction.toNumber("0", 2)); + assertEquals(1, ToNumberFunction.toNumber("1", 2)); + assertEquals(7, ToNumberFunction.toNumber("111", 2)); + assertEquals(10, ToNumberFunction.toNumber("1010", 2)); + } + + @Test + void testToNumberWithBase8() { + assertEquals(64, ToNumberFunction.toNumber("100", 8)); + assertEquals(8, ToNumberFunction.toNumber("10", 8)); + assertEquals(83, ToNumberFunction.toNumber("123", 8)); + assertEquals(511, ToNumberFunction.toNumber("777", 8)); + } + + @Test + void testToNumberWithBase16() { + assertEquals(255, ToNumberFunction.toNumber("FF", 16)); + assertEquals(16, ToNumberFunction.toNumber("10", 16)); + assertEquals(171, ToNumberFunction.toNumber("AB", 16)); + assertEquals(291, ToNumberFunction.toNumber("123", 16)); + assertEquals(4095, ToNumberFunction.toNumber("FFF", 16)); + } + + @Test + void testToNumberWithBase36() { + assertEquals(35, ToNumberFunction.toNumber("Z", 36)); + assertEquals(1295, ToNumberFunction.toNumber("ZZ", 36)); + assertEquals(46655, ToNumberFunction.toNumber("ZZZ", 36)); + } + + @Test + void testToNumberWithDecimalBase2() { + assertEquals(2.5, ToNumberFunction.toNumber("10.1", 2)); + assertEquals(1.5, ToNumberFunction.toNumber("1.1", 2)); + assertEquals(3.75, ToNumberFunction.toNumber("11.11", 2)); + } + + @Test + void testToNumberWithDecimalBase16() { + assertEquals(255.5, ToNumberFunction.toNumber("FF.8", 16)); + assertEquals(16.25, ToNumberFunction.toNumber("10.4", 16)); + assertEquals(171.6875, ToNumberFunction.toNumber("AB.B", 16)); + } + + @Test + void testToNumberWithNegativeDecimal() { + assertEquals(-2.5, ToNumberFunction.toNumber("-10.1", 2)); + assertEquals(-255.5, ToNumberFunction.toNumber("-FF.8", 16)); + assertEquals(-123.45, ToNumberFunction.toNumber("-123.45", 10)); + } + + @Test + void testToNumberWithEmptyFractionalPart() { + assertEquals(123.0, ToNumberFunction.toNumber("123.", 10)); + assertEquals(255.0, ToNumberFunction.toNumber("FF.", 16)); + assertEquals(5.0, ToNumberFunction.toNumber("101.", 2)); + } + + @Test + void testToNumberWithZeroIntegerPart() { + assertEquals(0.5, ToNumberFunction.toNumber("0.5", 10)); + assertEquals(0.5, ToNumberFunction.toNumber("0.1", 2)); + } + + @Test + void testToNumberInvalidBase() { + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("123", 1); + }); + + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("123", 37); + }); + + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("123", 0); + }); + + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("123", -1); + }); + } + + @Test + void testToNumberInvalidDigits() { + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("12A", 10); + }); + + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("102", 2); + }); + + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("189", 8); + }); + + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("GHI", 16); + }); + } + + @Test + void testToNumberInvalidFractionalDigits() { + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("10.2", 2); + }); + + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("FF.G", 16); + }); + + assertThrows(IllegalArgumentException.class, () -> { + ToNumberFunction.toNumber("123.ABC", 10); + }); + } + + @Test + void testToNumberEdgeCases() { + assertEquals(0, ToNumberFunction.toNumber("0", 2)); + assertEquals(0, ToNumberFunction.toNumber("0", 36)); + assertEquals(0.0, ToNumberFunction.toNumber("0.0", 10)); + assertEquals(0.0, ToNumberFunction.toNumber("0.000", 10)); + } + + @Test + void testToNumberLargeNumbers() { + assertEquals(Integer.MAX_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MAX_VALUE), 10)); + assertEquals(Integer.MIN_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MIN_VALUE), 10)); + } + + @Test + void testToNumberCaseInsensitivity() { + assertEquals(255, ToNumberFunction.toNumber("ff", 16)); + assertEquals(255, ToNumberFunction.toNumber("FF", 16)); + assertEquals(255, ToNumberFunction.toNumber("fF", 16)); + assertEquals(171, ToNumberFunction.toNumber("ab", 16)); + assertEquals(171, ToNumberFunction.toNumber("AB", 16)); + } +} From 370b9bc5d30ec90fbd7dc668ca5ee0939a720f87 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 21 Oct 2025 21:03:42 -0700 Subject: [PATCH 15/99] Update docs/user/ppl/functions/conversion.rst Co-authored-by: ritvibhatt <53196324+ritvibhatt@users.noreply.github.com> Signed-off-by: Asif Bashar Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index fad37c0bf26..d9da1bf3d8e 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -86,7 +86,7 @@ Description >>>>>>>>>>> The following usage options are available, depending on the parameter types and the number of parameters. -Usage with format type: tonumber(string, [base]): Converts the value in first argument to provided base type string in second argument. If second argument is not provided, then it converts to base 10 number representation. +Usage: tonumber(string, [base]) converts the value in first argument to provided base type string in second argument. If second argument is not provided, then it converts to base 10 number representation. Return type: Number From f0706849c4446e47615a2228debbea69c069d0b0 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 21 Oct 2025 21:20:00 -0700 Subject: [PATCH 16/99] fix per recommendation Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 34 ++++++++----------- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 4 +-- .../sql/ppl/parser/AstExpressionBuilder.java | 32 ----------------- 3 files changed, 16 insertions(+), 54 deletions(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index d9da1bf3d8e..3264d422ce1 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -84,39 +84,34 @@ TONUMBER Description >>>>>>>>>>> + The following usage options are available, depending on the parameter types and the number of parameters. Usage: tonumber(string, [base]) converts the value in first argument to provided base type string in second argument. If second argument is not provided, then it converts to base 10 number representation. + Return type: Number You can use this function with the eval commands and as part of eval expressions. Base values can be between 2 and 36. - - -Basic examples: You can use this function to convert a string representation of a binary number to return the corresponding number in base 10. -For example, the result of the following function is 5: -eval result = tonumber("0101", 2) - +Following example converts a string in binary to the number representation:: -Example:: -Following example converts a string in binary to the number representation. - os> source=EMP | eval int_value = tonumber('010101',2) | fields int_value|head 1 + os> source=EMP | eval int_value = tonumber('010101',2) | fields int_value | head 1 fetched rows / total rows = 1/1 - +--------------+ - | int_value | - |--------------+ - | 21.0 | - +--------------+ + +---------------+ + | int_value | + |---------------+ + | 21.0 | + +---------------+ + +Following example converts a string in hex to the number representation:: -Following example converts a string in hex to the number representation. -Example:: - os> source=EMP | eval int_value = tonumber('FA34',16) | fields int_value|head 1 + os> source=EMP | eval int_value = tonumber('FA34',16) | fields int_value | head 1 fetched rows / total rows = 1/1 +---------------+ | int_value | @@ -124,10 +119,9 @@ Example:: | 64052.0 | +---------------+ -Following example converts a string in decimal to the number representation. -Example:: +Following example converts a string in decimal to the number representation:: - os> source=EMP | eval int_value = tonumber('4598') | fields int_value|head 1 + os> source=EMP | eval int_value = tonumber('4598') | fields int_value | head 1 fetched rows / total rows = 1/1 +---------------+ | int_value | diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index e974c303375..a26b7381797 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -848,10 +848,9 @@ evalFunctionCall ; -// cast, tostring function +// cast function dataTypeFunctionCall : CAST LT_PRTHS logicalExpression AS convertedDataType RT_PRTHS - | TONUMBER LT_PRTHS functionArgs RT_PRTHS ; @@ -1234,6 +1233,7 @@ textFunctionName | LOCATE | REPLACE | REVERSE + | TONUMBER ; positionFunctionName diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 244f287c77a..b70226d039a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -431,39 +431,7 @@ public DataTypeFunctionCallContext createDataTypeFunctionCallContext(String cast /** Cast function. */ @Override public UnresolvedExpression visitDataTypeFunctionCall(DataTypeFunctionCallContext ctx) { - if (ctx.functionArgs() != null) { - - ParseTree rootNode = ctx.getChild(0); - String functionName = rootNode.getText(); - final String mappedName = - FUNCTION_NAME_MAPPING.getOrDefault(functionName.toLowerCase(Locale.ROOT), functionName); - System.out.println(mappedName); - if (mappedName != null && mappedName.equals("tostring")) { - if (ctx.functionArgs().functionArg().size() == 1) { - List functionArgs = - ctx.functionArgs().functionArg(); - - String castExpresstion = - String.format("cast( %s as String)", functionArgs.getFirst().getText()); - DataTypeFunctionCallContext toStringDataTypeConversionContext = - this.createDataTypeFunctionCallContext(castExpresstion); - return new Cast( - visit(toStringDataTypeConversionContext.logicalExpression()), - visit(toStringDataTypeConversionContext.convertedDataType())); - // - } else { - return buildFunction(mappedName, ctx.functionArgs().functionArg()); - } - } else if (mappedName != null && mappedName.equals("tonumber")) { - - return buildFunction(mappedName, ctx.functionArgs().functionArg()); - - } else { - return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); - } - } else { return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); - } } @Override From a2accf5bc8caa7ad804fba592cc15954dc8fbd28 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 21 Oct 2025 21:33:51 -0700 Subject: [PATCH 17/99] updated recommended changes Signed-off-by: Asif Bashar --- .../function/PPLBuiltinOperators.java | 9 +- .../expression/function/PPLFuncImpTable.java | 226 +++++++++++++++++- 2 files changed, 233 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 14f82947c74..7fd768a8d8b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -60,7 +60,14 @@ import org.opensearch.sql.expression.function.jsonUDF.JsonFunctionImpl; import org.opensearch.sql.expression.function.jsonUDF.JsonKeysFunctionImpl; import org.opensearch.sql.expression.function.jsonUDF.JsonSetFunctionImpl; -import org.opensearch.sql.expression.function.udf.*; +import org.opensearch.sql.expression.function.udf.CryptographicFunction; +import org.opensearch.sql.expression.function.udf.ParseFunction; +import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction; +import org.opensearch.sql.expression.function.udf.RexExtractFunction; +import org.opensearch.sql.expression.function.udf.RexExtractMultiFunction; +import org.opensearch.sql.expression.function.udf.RexOffsetFunction; +import org.opensearch.sql.expression.function.udf.SpanFunction; +import org.opensearch.sql.expression.function.udf.ToNumberFunction; import org.opensearch.sql.expression.function.udf.condition.EarliestFunction; import org.opensearch.sql.expression.function.udf.condition.EnhancedCoalesceFunction; import org.opensearch.sql.expression.function.udf.condition.LatestFunction; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 6f61eb14b48..d9e594f089f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -8,7 +8,231 @@ import static org.apache.calcite.sql.SqlJsonConstructorNullClause.NULL_ON_NULL; import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.getLegacyTypeName; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.*; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ABS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ACOS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADD; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDDATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDFUNCTION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDTIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.AND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASCII; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASIN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ATAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ATAN2; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.AVG; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CBRT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CEIL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CEILING; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CIDRMATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.COALESCE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CONCAT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CONCAT_WS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CONV; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CONVERT_TZ; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.COS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.COSH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.COT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.COUNT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CRC32; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURDATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURRENT_DATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURRENT_TIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURRENT_TIMESTAMP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CURTIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATEDIFF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATETIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATE_ADD; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATE_FORMAT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DATE_SUB; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAYNAME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAYOFMONTH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAYOFWEEK; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAYOFYEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_MONTH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_WEEK; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DEGREES; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDEFUNCTION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.E; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.EARLIEST; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.EQUAL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.EXISTS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.EXP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.EXPM1; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.EXTRACT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.FILTER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.FIRST; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.FLOOR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.FORALL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.FROM_DAYS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.FROM_UNIXTIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.GET_FORMAT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.GREATER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.GTE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.HOUR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.HOUR_OF_DAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.IF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.IFNULL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_GROK; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_ITEM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PARSE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PATTERN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PATTERN_PARSER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_5; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_PG_4; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_TRANSLATE3; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_BLANK; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_EMPTY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NULL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_PRESENT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_APPEND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_ARRAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_ARRAY_LENGTH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_DELETE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_EXTEND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_EXTRACT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_EXTRACT_ALL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_KEYS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_OBJECT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_SET; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.JSON_VALID; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LAST; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LAST_DAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LATEST; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LEFT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LENGTH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LESS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIKE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LIST; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOCALTIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOCALTIMESTAMP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOCATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOG; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOG10; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOG2; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOWER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTRIM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAKEDATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAKETIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAP_APPEND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAP_CONCAT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAP_REMOVE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_BOOL_PREFIX; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE_PREFIX; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_DAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_HOUR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MOD; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MODULUS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MODULUSFUNCTION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTHNAME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVAPPEND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVJOIN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOTEQUAL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOW; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.NULLIF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.OR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.PERCENTILE_APPROX; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.PERIOD_ADD; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.PERIOD_DIFF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.PI; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.POSITION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.POW; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.POWER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUARTER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY_STRING; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RADIANS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REPLACE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT_MULTI; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_OFFSET; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RINT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ROUND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RTRIM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SECOND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SECOND_OF_MINUTE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SEC_TO_TIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SHA1; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SHA2; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIGN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIGNUM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIMPLE_QUERY_STRING; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SINH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SQRT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_POP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_SAMP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.STRCMP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.STRFTIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.STR_TO_DATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBDATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBSTR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBSTRING; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBTIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBTRACT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBTRACTFUNCTION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SYSDATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TAKE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMEDIFF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPADD; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPDIFF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIME_FORMAT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIME_TO_SEC; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TO_DAYS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TO_SECONDS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TRANSFORM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TRIM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TRUNCATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TYPEOF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.UNIX_TIMESTAMP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.UPPER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.UTC_DATE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.UTC_TIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.UTC_TIMESTAMP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.VALUES; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.VARPOP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.VARSAMP; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKDAY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKOFYEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.XOR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TONUMBER; import com.google.common.collect.ImmutableMap; import java.math.BigDecimal; From 3adda5d37467116c9e8b1591832755281361ce9e Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 21 Oct 2025 21:36:28 -0700 Subject: [PATCH 18/99] updated recommended changes Signed-off-by: Asif Bashar --- .../sql/ppl/parser/AstExpressionBuilder.java | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index b70226d039a..f7f6b3e914a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -414,24 +414,10 @@ private Function buildFunction( functionName, args.stream().map(this::visitFunctionArg).collect(Collectors.toList())); } - public DataTypeFunctionCallContext createDataTypeFunctionCallContext(String castExpression) { - // Create a case-insensitive character stream from the input - CaseInsensitiveCharStream charStream = new CaseInsensitiveCharStream(castExpression); - - // Create lexer and parser - OpenSearchPPLLexer lexer = new OpenSearchPPLLexer(charStream); - CommonTokenStream tokens = new CommonTokenStream(lexer); - OpenSearchPPLParser parser = new OpenSearchPPLParser(tokens); - - // Parse the expression - cast is part of evalFunctionCall - DataTypeFunctionCallContext evalContext = parser.dataTypeFunctionCall(); - return evalContext; - } - /** Cast function. */ @Override public UnresolvedExpression visitDataTypeFunctionCall(DataTypeFunctionCallContext ctx) { - return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); + return new Cast(visit(ctx.logicalExpression()), visit(ctx.convertedDataType())); } @Override From 17b1d86c3098b1fa11171ffe05fdcaa47fbf53d8 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 21 Oct 2025 21:38:11 -0700 Subject: [PATCH 19/99] updated recommended changes Signed-off-by: Asif Bashar --- .../org/opensearch/sql/ppl/parser/AstExpressionBuilder.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index f7f6b3e914a..f037376f5c2 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -19,7 +19,6 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; -import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.RuleContext; import org.antlr.v4.runtime.tree.ParseTree; @@ -30,10 +29,8 @@ import org.opensearch.sql.ast.expression.subquery.ScalarSubquery; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.calcite.plan.OpenSearchConstants; -import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.BinaryArithmeticContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.BooleanLiteralContext; From eca7d139fce7d750e2be988facd8764dd6d1674c Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 21 Oct 2025 21:38:59 -0700 Subject: [PATCH 20/99] updated recommended changes Signed-off-by: Asif Bashar --- .../opensearch/sql/expression/function/BuiltinFunctionName.java | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index fe862adc322..df3ae06d5c8 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -149,6 +149,7 @@ public enum BuiltinFunctionName { SYSDATE(FunctionName.of("sysdate")), /** Text Functions. */ + TOSTRING(FunctionName.of("tostring")), TONUMBER(FunctionName.of("tonumber")), /** IP Functions. */ From e780c156df19215bc03e4ba45988b32393f6ddcc Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 21 Oct 2025 21:40:50 -0700 Subject: [PATCH 21/99] updated recommended changes Signed-off-by: Asif Bashar --- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 -- 1 file changed, 2 deletions(-) diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index a26b7381797..d386813fc96 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -847,13 +847,11 @@ evalFunctionCall : evalFunctionName LT_PRTHS functionArgs RT_PRTHS ; - // cast function dataTypeFunctionCall : CAST LT_PRTHS logicalExpression AS convertedDataType RT_PRTHS ; - convertedDataType : typeName = DATE | typeName = TIME From e3884b0a5ac00d147268fe964dcf554739685c88 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 21 Oct 2025 21:42:31 -0700 Subject: [PATCH 22/99] updated recommended changes Signed-off-by: Asif Bashar --- .../org/opensearch/sql/expression/function/PPLFuncImpTable.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index d9e594f089f..a5d0d1b8712 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -1062,7 +1062,6 @@ void populate() { SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)), false)); - register( LOG, (FunctionImp2) From 042d3e821a03a2c5a5438f449bfbd983d8e53057 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Wed, 22 Oct 2025 10:11:40 -0700 Subject: [PATCH 23/99] spotless apply Signed-off-by: Asif Bashar --- .../sql/calcite/utils/PPLOperandTypes.java | 10 +- .../expression/function/PPLFuncImpTable.java | 2 +- .../function/udf/ToNumberFunction.java | 179 ++++++++---------- .../function/udf/ToNumberFunctionTest.java | 112 ++++++----- 4 files changed, 153 insertions(+), 150 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index b00fa607d55..c64fa6ef4d5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -106,12 +106,12 @@ private PPLOperandTypes() {} SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER)); public static final UDFOperandMetadata STRING_OR_STRING_INTEGER = - UDFOperandMetadata.wrap( - (CompositeOperandTypeChecker) - OperandTypes.family(SqlTypeFamily.CHARACTER).or(OperandTypes.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER))); - + UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + OperandTypes.family(SqlTypeFamily.CHARACTER) + .or(OperandTypes.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER))); - public static final UDFOperandMetadata STRING_STRING_INTEGER_INTEGER = + public static final UDFOperandMetadata STRING_STRING_INTEGER_INTEGER = UDFOperandMetadata.wrap( OperandTypes.family( SqlTypeFamily.CHARACTER, diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index a5d0d1b8712..4339619275c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -211,6 +211,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPDIFF; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIME_FORMAT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIME_TO_SEC; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TONUMBER; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TO_DAYS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TO_SECONDS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.TRANSFORM; @@ -232,7 +233,6 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.XOR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.TONUMBER; import com.google.common.collect.ImmutableMap; import java.math.BigDecimal; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 95b37a0107c..0c5b576b297 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -5,12 +5,7 @@ package org.opensearch.sql.expression.function.udf; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.text.NumberFormat; import java.util.List; -import java.util.Locale; - import org.apache.calcite.adapter.enumerable.NotNullImplementor; import org.apache.calcite.adapter.enumerable.NullPolicy; import org.apache.calcite.adapter.enumerable.RexToLixTranslator; @@ -22,7 +17,6 @@ import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.opensearch.sql.calcite.utils.PPLOperandTypes; -import org.opensearch.sql.calcite.utils.PPLReturnTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -36,105 +30,92 @@ * HH:MM:SS. if not format parameter provided, then consider value as boolean */ public class ToNumberFunction extends ImplementorUDF { - public ToNumberFunction() { - super( - new org.opensearch.sql.expression.function.udf.ToNumberFunction.ToNumberImplementor(), - NullPolicy.ANY); - } - - public static final String DURATION_FORMAT = "duration"; - public static final String DURATION_MILLIS_FORMAT = "duration_millis"; - public static final String HEX_FORMAT = "hex"; - public static final String COMMAS_FORMAT = "commas"; - public static final String BINARY_FORMAT = "binary"; - public static final SqlFunctions.DateFormatFunction dateTimeFormatter = - new SqlFunctions.DateFormatFunction(); - public static final String format24hour = "%H:%M:%S"; // 24-hour format + public ToNumberFunction() { + super( + new org.opensearch.sql.expression.function.udf.ToNumberFunction.ToNumberImplementor(), + NullPolicy.ANY); + } + + public static final String DURATION_FORMAT = "duration"; + public static final String DURATION_MILLIS_FORMAT = "duration_millis"; + public static final String HEX_FORMAT = "hex"; + public static final String COMMAS_FORMAT = "commas"; + public static final String BINARY_FORMAT = "binary"; + public static final SqlFunctions.DateFormatFunction dateTimeFormatter = + new SqlFunctions.DateFormatFunction(); + public static final String format24hour = "%H:%M:%S"; // 24-hour format + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return ReturnTypes.DOUBLE_FORCE_NULLABLE; + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return PPLOperandTypes.STRING_OR_STRING_INTEGER; + } + + public static class ToNumberImplementor implements NotNullImplementor { @Override - public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.DOUBLE_FORCE_NULLABLE; + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + Expression fieldValue = translatedOperands.get(0); + int base = 10; + if (translatedOperands.size() > 1) { + Expression baseExpr = translatedOperands.get(1); + return Expressions.call(ToNumberFunction.class, "toNumber", fieldValue, baseExpr); + } else { + return Expressions.call(ToNumberFunction.class, "toNumber", fieldValue); + } } + } - @Override - public UDFOperandMetadata getOperandMetadata() { - return PPLOperandTypes.STRING_OR_STRING_INTEGER; - } - - public static class ToNumberImplementor implements NotNullImplementor { - - @Override - public Expression implement( - RexToLixTranslator translator, RexCall call, List translatedOperands) { - Expression fieldValue = translatedOperands.get(0); - int base = 10; - if (translatedOperands.size() > 1) { - Expression baseExpr = translatedOperands.get(1); - return Expressions.call(ToNumberFunction.class, "toNumber", fieldValue, baseExpr); - } else { - return Expressions.call(ToNumberFunction.class, "toNumber", fieldValue); - } - - + @Strict + public static Number toNumber(String numStr) { + return toNumber(numStr, 10); + } - - - } + @Strict + public static Number toNumber(String numStr, int base) { + if (base < 2 || base > 36) { + throw new IllegalArgumentException("Base must be between 2 and 36"); } - @Strict - public static Number toNumber(String numStr) { - return toNumber(numStr, 10); + if (numStr.contains(".")) { + + boolean isNegative = numStr.startsWith("-"); + if (isNegative) { + numStr = numStr.substring(1); + } + + // Split integer and fractional parts + String[] parts = numStr.split("\\."); + String intPart = parts[0]; + String fracPart = parts.length > 1 ? parts[1] : ""; + + // Convert integer part + double intValue = 0; + for (char c : intPart.toCharArray()) { + int digit = Character.digit(c, base); + if (digit < 0) throw new IllegalArgumentException("Invalid digit: " + c); + intValue = intValue * base + digit; + } + + // Convert fractional part + double fracValue = 0; + double divisor = base; + for (char c : fracPart.toCharArray()) { + int digit = Character.digit(c, base); + if (digit < 0) throw new IllegalArgumentException("Invalid digit: " + c); + fracValue += (double) digit / divisor; + divisor *= base; + } + + double result = intValue + fracValue; + return isNegative ? -result : result; + } else { + return Integer.parseInt(numStr, base); } - - @Strict - public static Number toNumber(String numStr, int base) { - if (base < 2 || base > 36) { - throw new IllegalArgumentException("Base must be between 2 and 36"); - } - - if (numStr.contains(".")) { - - - boolean isNegative = numStr.startsWith("-"); - if (isNegative) { - numStr = numStr.substring(1); - } - - // Split integer and fractional parts - String[] parts = numStr.split("\\."); - String intPart = parts[0]; - String fracPart = parts.length > 1 ? parts[1] : ""; - - // Convert integer part - double intValue = 0; - for (char c : intPart.toCharArray()) { - int digit = Character.digit(c, base); - if (digit < 0) - throw new IllegalArgumentException("Invalid digit: " + c); - intValue = intValue * base + digit; - } - - // Convert fractional part - double fracValue = 0; - double divisor = base; - for (char c : fracPart.toCharArray()) { - int digit = Character.digit(c, base); - if (digit < 0) - throw new IllegalArgumentException("Invalid digit: " + c); - fracValue += (double) digit / divisor; - divisor *= base; - } - - double result = intValue + fracValue; - return isNegative ? -result : result; - } else { - return Integer.parseInt(numStr, base); - } - } - + } } - - - - diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java index d7f2b6fd685..7c29f75ba0a 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java @@ -7,8 +7,6 @@ import static org.junit.jupiter.api.Assertions.*; -import java.math.BigDecimal; -import java.util.Locale; import org.apache.calcite.sql.type.ReturnTypes; import org.junit.jupiter.api.Test; import org.opensearch.sql.calcite.utils.PPLOperandTypes; @@ -116,55 +114,77 @@ void testToNumberWithZeroIntegerPart() { @Test void testToNumberInvalidBase() { - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("123", 1); - }); - - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("123", 37); - }); - - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("123", 0); - }); - - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("123", -1); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("123", 1); + }); + + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("123", 37); + }); + + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("123", 0); + }); + + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("123", -1); + }); } @Test void testToNumberInvalidDigits() { - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("12A", 10); - }); - - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("102", 2); - }); - - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("189", 8); - }); - - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("GHI", 16); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("12A", 10); + }); + + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("102", 2); + }); + + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("189", 8); + }); + + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("GHI", 16); + }); } @Test void testToNumberInvalidFractionalDigits() { - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("10.2", 2); - }); - - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("FF.G", 16); - }); - - assertThrows(IllegalArgumentException.class, () -> { - ToNumberFunction.toNumber("123.ABC", 10); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("10.2", 2); + }); + + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("FF.G", 16); + }); + + assertThrows( + IllegalArgumentException.class, + () -> { + ToNumberFunction.toNumber("123.ABC", 10); + }); } @Test @@ -177,8 +197,10 @@ void testToNumberEdgeCases() { @Test void testToNumberLargeNumbers() { - assertEquals(Integer.MAX_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MAX_VALUE), 10)); - assertEquals(Integer.MIN_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MIN_VALUE), 10)); + assertEquals( + Integer.MAX_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MAX_VALUE), 10)); + assertEquals( + Integer.MIN_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MIN_VALUE), 10)); } @Test From 58d8e1b5714d9f02f80b910da847741cc309f713 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 28 Oct 2025 10:53:26 -0700 Subject: [PATCH 24/99] merge conflict fix Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 849d2334e41..ffe98228c39 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -117,3 +117,54 @@ Use string in comparison operator example :: | True | False | True | False | True | True | null | +------+-------+------+-------+------+------+------+ + + +TONUMBER +----------- + +Description +>>>>>>>>>>> + +The following usage options are available, depending on the parameter types and the number of parameters. + +Usage: tonumber(string, [base]) converts the value in first argument to provided base type string in second argument. If second argument is not provided, then it converts to base 10 number representation. + +Return type: Number + + +You can use this function with the eval commands and as part of eval expressions. +Base values can be between 2 and 36. + +You can use this function to convert a string representation of a binary number to return the corresponding number in base 10. + +Following example converts a string in binary to the number representation:: + + os> source=EMP | eval int_value = tonumber('010101',2) | fields int_value | head 1 + fetched rows / total rows = 1/1 + +---------------+ + | int_value | + |---------------+ + | 21.0 | + +---------------+ + + +Following example converts a string in hex to the number representation:: + + + os> source=EMP | eval int_value = tonumber('FA34',16) | fields int_value | head 1 + fetched rows / total rows = 1/1 + +---------------+ + | int_value | + |---------------+ + | 64052.0 | + +---------------+ + +Following example converts a string in decimal to the number representation:: + + os> source=EMP | eval int_value = tonumber('4598') | fields int_value | head 1 + fetched rows / total rows = 1/1 + +---------------+ + | int_value | + |---------------+ + | 4598.0 | + +---------------+ From 34f86355d4da09a88b761ac3e1190f7a609b1156 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Sat, 1 Nov 2025 22:48:31 -0700 Subject: [PATCH 25/99] added recommended changes Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 24 ++-- .../function/udf/ToNumberFunctionTest.java | 105 +++++++----------- docs/user/ppl/functions/conversion.rst | 6 +- .../CalcitePPLToNumberFunctionTest.java | 71 ++++++++++++ 4 files changed, 133 insertions(+), 73 deletions(-) create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 0c5b576b297..38f1734a8aa 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -104,18 +104,26 @@ public static Number toNumber(String numStr, int base) { // Convert fractional part double fracValue = 0; - double divisor = base; - for (char c : fracPart.toCharArray()) { - int digit = Character.digit(c, base); - if (digit < 0) throw new IllegalArgumentException("Invalid digit: " + c); - fracValue += (double) digit / divisor; - divisor *= base; + if (base == 10) { + double divisor = base; + for (char c : fracPart.toCharArray()) { + int digit = Character.digit(c, base); + if (digit < 0) throw new IllegalArgumentException("Invalid digit: " + c); + fracValue += (double) digit / divisor; + divisor *= base; + } } double result = intValue + fracValue; - return isNegative ? -result : result; + result = isNegative ? -result : result; + if (base == 10) { + return result; + } + else { + return (long) result; + } } else { - return Integer.parseInt(numStr, base); + return Long.parseLong(numStr, base); } } } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java index 7c29f75ba0a..3a8c3185240 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java @@ -27,9 +27,9 @@ void testGetOperandMetadata() { @Test void testToNumberWithDefaultBase() { - assertEquals(123, ToNumberFunction.toNumber("123")); - assertEquals(0, ToNumberFunction.toNumber("0")); - assertEquals(-456, ToNumberFunction.toNumber("-456")); + assertEquals(123L, ToNumberFunction.toNumber("123")); + assertEquals(0L, ToNumberFunction.toNumber("0")); + assertEquals(-456L, ToNumberFunction.toNumber("-456")); assertEquals(123.45, ToNumberFunction.toNumber("123.45")); assertEquals(-123.45, ToNumberFunction.toNumber("-123.45")); assertEquals(0.5, ToNumberFunction.toNumber("0.5")); @@ -38,78 +38,78 @@ void testToNumberWithDefaultBase() { @Test void testToNumberWithBase10() { - assertEquals(123, ToNumberFunction.toNumber("123", 10)); - assertEquals(0, ToNumberFunction.toNumber("0", 10)); - assertEquals(-456, ToNumberFunction.toNumber("-456", 10)); + assertEquals(123L, ToNumberFunction.toNumber("123", 10)); + assertEquals(0L, ToNumberFunction.toNumber("0", 10)); + assertEquals(-456L, ToNumberFunction.toNumber("-456", 10)); assertEquals(123.45, ToNumberFunction.toNumber("123.45", 10)); assertEquals(-123.45, ToNumberFunction.toNumber("-123.45", 10)); } @Test void testToNumberWithBase2() { - assertEquals(5, ToNumberFunction.toNumber("101", 2)); - assertEquals(0, ToNumberFunction.toNumber("0", 2)); - assertEquals(1, ToNumberFunction.toNumber("1", 2)); - assertEquals(7, ToNumberFunction.toNumber("111", 2)); - assertEquals(10, ToNumberFunction.toNumber("1010", 2)); + assertEquals(5L, ToNumberFunction.toNumber("101", 2)); + assertEquals(0L, ToNumberFunction.toNumber("0", 2)); + assertEquals(1L, ToNumberFunction.toNumber("1", 2)); + assertEquals(7L, ToNumberFunction.toNumber("111", 2)); + assertEquals(10L, ToNumberFunction.toNumber("1010", 2)); } @Test void testToNumberWithBase8() { - assertEquals(64, ToNumberFunction.toNumber("100", 8)); - assertEquals(8, ToNumberFunction.toNumber("10", 8)); - assertEquals(83, ToNumberFunction.toNumber("123", 8)); - assertEquals(511, ToNumberFunction.toNumber("777", 8)); + assertEquals(64L, ToNumberFunction.toNumber("100", 8)); + assertEquals(8L, ToNumberFunction.toNumber("10", 8)); + assertEquals(83L, ToNumberFunction.toNumber("123", 8)); + assertEquals(511L, ToNumberFunction.toNumber("777", 8)); } @Test void testToNumberWithBase16() { - assertEquals(255, ToNumberFunction.toNumber("FF", 16)); - assertEquals(16, ToNumberFunction.toNumber("10", 16)); - assertEquals(171, ToNumberFunction.toNumber("AB", 16)); - assertEquals(291, ToNumberFunction.toNumber("123", 16)); - assertEquals(4095, ToNumberFunction.toNumber("FFF", 16)); + assertEquals(255L, ToNumberFunction.toNumber("FF", 16)); + assertEquals(16L, ToNumberFunction.toNumber("10", 16)); + assertEquals(171L, ToNumberFunction.toNumber("AB", 16)); + assertEquals(291L, ToNumberFunction.toNumber("123", 16)); + assertEquals(4095L, ToNumberFunction.toNumber("FFF", 16)); } @Test void testToNumberWithBase36() { - assertEquals(35, ToNumberFunction.toNumber("Z", 36)); - assertEquals(1295, ToNumberFunction.toNumber("ZZ", 36)); - assertEquals(46655, ToNumberFunction.toNumber("ZZZ", 36)); + assertEquals(35L, ToNumberFunction.toNumber("Z", 36)); + assertEquals(1295L, ToNumberFunction.toNumber("ZZ", 36)); + assertEquals(46655L, ToNumberFunction.toNumber("ZZZ", 36)); } @Test void testToNumberWithDecimalBase2() { - assertEquals(2.5, ToNumberFunction.toNumber("10.1", 2)); - assertEquals(1.5, ToNumberFunction.toNumber("1.1", 2)); - assertEquals(3.75, ToNumberFunction.toNumber("11.11", 2)); + assertEquals(2L, ToNumberFunction.toNumber("10.1", 2)); + assertEquals(1L, ToNumberFunction.toNumber("1.1", 2)); + assertEquals(3L, ToNumberFunction.toNumber("11.11", 2)); } @Test void testToNumberWithDecimalBase16() { - assertEquals(255.5, ToNumberFunction.toNumber("FF.8", 16)); - assertEquals(16.25, ToNumberFunction.toNumber("10.4", 16)); - assertEquals(171.6875, ToNumberFunction.toNumber("AB.B", 16)); + assertEquals(255L, ToNumberFunction.toNumber("FF.8", 16)); + assertEquals(16L, ToNumberFunction.toNumber("10.4", 16)); + assertEquals(171L, ToNumberFunction.toNumber("AB.B", 16)); } @Test void testToNumberWithNegativeDecimal() { - assertEquals(-2.5, ToNumberFunction.toNumber("-10.1", 2)); - assertEquals(-255.5, ToNumberFunction.toNumber("-FF.8", 16)); + assertEquals(-2L, ToNumberFunction.toNumber("-10.1", 2)); + assertEquals(-255L, ToNumberFunction.toNumber("-FF.8", 16)); assertEquals(-123.45, ToNumberFunction.toNumber("-123.45", 10)); } @Test void testToNumberWithEmptyFractionalPart() { assertEquals(123.0, ToNumberFunction.toNumber("123.", 10)); - assertEquals(255.0, ToNumberFunction.toNumber("FF.", 16)); - assertEquals(5.0, ToNumberFunction.toNumber("101.", 2)); + assertEquals(255L, ToNumberFunction.toNumber("FF.", 16)); + assertEquals(5L, ToNumberFunction.toNumber("101.", 2)); } @Test void testToNumberWithZeroIntegerPart() { assertEquals(0.5, ToNumberFunction.toNumber("0.5", 10)); - assertEquals(0.5, ToNumberFunction.toNumber("0.1", 2)); + assertEquals(0L, ToNumberFunction.toNumber("0.1", 2)); } @Test @@ -166,31 +166,12 @@ void testToNumberInvalidDigits() { }); } - @Test - void testToNumberInvalidFractionalDigits() { - assertThrows( - IllegalArgumentException.class, - () -> { - ToNumberFunction.toNumber("10.2", 2); - }); - assertThrows( - IllegalArgumentException.class, - () -> { - ToNumberFunction.toNumber("FF.G", 16); - }); - - assertThrows( - IllegalArgumentException.class, - () -> { - ToNumberFunction.toNumber("123.ABC", 10); - }); - } @Test void testToNumberEdgeCases() { - assertEquals(0, ToNumberFunction.toNumber("0", 2)); - assertEquals(0, ToNumberFunction.toNumber("0", 36)); + assertEquals(0L, ToNumberFunction.toNumber("0", 2)); + assertEquals(0L, ToNumberFunction.toNumber("0", 36)); assertEquals(0.0, ToNumberFunction.toNumber("0.0", 10)); assertEquals(0.0, ToNumberFunction.toNumber("0.000", 10)); } @@ -198,17 +179,17 @@ void testToNumberEdgeCases() { @Test void testToNumberLargeNumbers() { assertEquals( - Integer.MAX_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MAX_VALUE), 10)); + (long) Integer.MAX_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MAX_VALUE), 10)); assertEquals( - Integer.MIN_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MIN_VALUE), 10)); + (long) Integer.MIN_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MIN_VALUE), 10)); } @Test void testToNumberCaseInsensitivity() { - assertEquals(255, ToNumberFunction.toNumber("ff", 16)); - assertEquals(255, ToNumberFunction.toNumber("FF", 16)); - assertEquals(255, ToNumberFunction.toNumber("fF", 16)); - assertEquals(171, ToNumberFunction.toNumber("ab", 16)); - assertEquals(171, ToNumberFunction.toNumber("AB", 16)); + assertEquals(255L, ToNumberFunction.toNumber("ff", 16)); + assertEquals(255L, ToNumberFunction.toNumber("FF", 16)); + assertEquals(255L, ToNumberFunction.toNumber("fF", 16)); + assertEquals(171L, ToNumberFunction.toNumber("ab", 16)); + assertEquals(171L, ToNumberFunction.toNumber("AB", 16)); } } diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index ffe98228c39..0176c6a6003 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -139,7 +139,7 @@ You can use this function to convert a string representation of a binary number Following example converts a string in binary to the number representation:: - os> source=EMP | eval int_value = tonumber('010101',2) | fields int_value | head 1 + os> source=people | eval int_value = tonumber('010101',2) | fields int_value | head 1 fetched rows / total rows = 1/1 +---------------+ | int_value | @@ -151,7 +151,7 @@ Following example converts a string in binary to the number representation:: Following example converts a string in hex to the number representation:: - os> source=EMP | eval int_value = tonumber('FA34',16) | fields int_value | head 1 + os> source=people | eval int_value = tonumber('FA34',16) | fields int_value | head 1 fetched rows / total rows = 1/1 +---------------+ | int_value | @@ -161,7 +161,7 @@ Following example converts a string in hex to the number representation:: Following example converts a string in decimal to the number representation:: - os> source=EMP | eval int_value = tonumber('4598') | fields int_value | head 1 + os> source=people | eval int_value = tonumber('4598') | fields int_value | head 1 fetched rows / total rows = 1/1 +---------------+ | int_value | diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java new file mode 100644 index 00000000000..885812a9003 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java @@ -0,0 +1,71 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLToNumberFunctionTest extends CalcitePPLAbstractTest { + + public CalcitePPLToNumberFunctionTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testNumberBinary() { + String ppl = "source=EMP | eval int_value = tonumber('010101',2) | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n LogicalProject(int_value=[TONUMBER('010101':VARCHAR, 2)])\n LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=21.0\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT `TONUMBER`('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + + @Test + public void testNumberHex() { + String ppl = "source=EMP | eval int_value = tonumber('FA34',16) | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = "LogicalSort(fetch=[1])\n LogicalProject(int_value=[TONUMBER('FA34':VARCHAR, 16)])\n LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=64052.0\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT `TONUMBER`('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNumber() { + String ppl = "source=EMP | eval int_value = tonumber('4598') | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = "LogicalSort(fetch=[1])\n LogicalProject(int_value=[TONUMBER('4598':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=4598.0\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT `TONUMBER`('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNumberDecimal() { + String ppl = "source=EMP | eval int_value = tonumber('4598.54922') | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = "LogicalSort(fetch=[1])\n LogicalProject(int_value=[TONUMBER('4598.54922':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=4598.54922\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = "SELECT `TONUMBER`('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} From 8279bf3c18889272ee8ac0266a5fdeaa3c4ab5b2 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 4 Nov 2025 18:13:10 -0800 Subject: [PATCH 26/99] fixed doctest for conversion.rst and applied spotless Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 23 +++-- .../function/udf/ToNumberFunctionTest.java | 6 +- docs/user/ppl/functions/conversion.rst | 78 ++++++++--------- .../CalcitePPLToNumberFunctionTest.java | 85 +++++++++++-------- 4 files changed, 101 insertions(+), 91 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 38f1734a8aa..479cbefaf87 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -105,22 +105,21 @@ public static Number toNumber(String numStr, int base) { // Convert fractional part double fracValue = 0; if (base == 10) { - double divisor = base; - for (char c : fracPart.toCharArray()) { - int digit = Character.digit(c, base); - if (digit < 0) throw new IllegalArgumentException("Invalid digit: " + c); - fracValue += (double) digit / divisor; - divisor *= base; - } + double divisor = base; + for (char c : fracPart.toCharArray()) { + int digit = Character.digit(c, base); + if (digit < 0) throw new IllegalArgumentException("Invalid digit: " + c); + fracValue += (double) digit / divisor; + divisor *= base; + } } double result = intValue + fracValue; - result = isNegative ? -result : result; + result = isNegative ? -result : result; if (base == 10) { - return result; - } - else { - return (long) result; + return result; + } else { + return (long) result; } } else { return Long.parseLong(numStr, base); diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java index 3a8c3185240..cd976956408 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java @@ -166,8 +166,6 @@ void testToNumberInvalidDigits() { }); } - - @Test void testToNumberEdgeCases() { assertEquals(0L, ToNumberFunction.toNumber("0", 2)); @@ -179,9 +177,9 @@ void testToNumberEdgeCases() { @Test void testToNumberLargeNumbers() { assertEquals( - (long) Integer.MAX_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MAX_VALUE), 10)); + (long) Integer.MAX_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MAX_VALUE), 10)); assertEquals( - (long) Integer.MIN_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MIN_VALUE), 10)); + (long) Integer.MIN_VALUE, ToNumberFunction.toNumber(String.valueOf(Integer.MIN_VALUE), 10)); } @Test diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 0176c6a6003..fd34681b03f 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -43,21 +43,21 @@ Cast to string example:: os> source=people | eval `cbool` = CAST(true as string), `cint` = CAST(1 as string), `cdate` = CAST(CAST('2012-08-07' as date) as string) | fields `cbool`, `cint`, `cdate` fetched rows / total rows = 1/1 - +-------+------+------------+ - | cbool | cint | cdate | - |-------+------+------------| - | TRUE | 1 | 2012-08-07 | - +-------+------+------------+ + +---------+--------+------------+ + | cbool | cint | cdate | + |---------+--------+------------| + | TRUE | 1 | 2012-08-07 | + +---------+--------+------------+ Cast to number example:: os> source=people | eval `cbool` = CAST(true as int), `cstring` = CAST('1' as int) | fields `cbool`, `cstring` fetched rows / total rows = 1/1 - +-------+---------+ - | cbool | cstring | - |-------+---------| - | 1 | 1 | - +-------+---------+ + +---------+-----------+ + | cbool | cstring | + |---------+-----------| + | 1 | 1 | + +---------+-----------+ Cast to date example:: @@ -73,11 +73,11 @@ Cast function can be chained:: os> source=people | eval `cbool` = CAST(CAST(true as string) as boolean) | fields `cbool` fetched rows / total rows = 1/1 - +-------+ - | cbool | - |-------| - | True | - +-------+ + +---------+ + | cbool | + |---------| + | True | + +---------+ IMPLICIT (AUTO) TYPE CONVERSION @@ -101,11 +101,11 @@ Use string in arithmetic operator example :: os> source=people | eval divide="5"/10, multiply="5" * 10, add="5" + 10, minus="5" - 10, concat="5" + "5" | fields divide, multiply, add, minus, concat fetched rows / total rows = 1/1 - +--------+----------+------+-------+--------+ - | divide | multiply | add | minus | concat | - |--------+----------+------+-------+--------| - | 0.5 | 50.0 | 15.0 | -5.0 | 55 | - +--------+----------+------+-------+--------+ + +----------+------------+-------+---------+----------+ + | divide | multiply | add | minus | concat | + |----------+------------+-------+---------+----------| + | 0.5 | 50.0 | 15.0 | -5.0 | 55 | + +----------+------------+-------+---------+----------+ Use string in comparison operator example :: @@ -120,7 +120,7 @@ Use string in comparison operator example :: TONUMBER ------------ +-------- Description >>>>>>>>>>> @@ -141,30 +141,30 @@ Following example converts a string in binary to the number representation:: os> source=people | eval int_value = tonumber('010101',2) | fields int_value | head 1 fetched rows / total rows = 1/1 - +---------------+ - | int_value | - |---------------+ - | 21.0 | - +---------------+ + +-------------+ + | int_value | + |-------------| + | 21.0 | + +-------------+ Following example converts a string in hex to the number representation:: - os> source=people | eval int_value = tonumber('FA34',16) | fields int_value | head 1 fetched rows / total rows = 1/1 - +---------------+ - | int_value | - |---------------+ - | 64052.0 | - +---------------+ + +-------------+ + | int_value | + |-------------| + | 64052.0 | + +-------------+ Following example converts a string in decimal to the number representation:: - os> source=people | eval int_value = tonumber('4598') | fields int_value | head 1 - fetched rows / total rows = 1/1 - +---------------+ - | int_value | - |---------------+ - | 4598.0 | - +---------------+ + os> source=people | eval int_value = tonumber('4598') | fields int_value | head 1 + fetched rows / total rows = 1/1 + +-------------+ + | int_value | + |-------------| + | 4598.0 | + +-------------+ + diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java index 885812a9003..37053e59f51 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java @@ -20,52 +20,65 @@ public void testNumberBinary() { String ppl = "source=EMP | eval int_value = tonumber('010101',2) | fields int_value|head 1"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalSort(fetch=[1])\n LogicalProject(int_value=[TONUMBER('010101':VARCHAR, 2)])\n LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(fetch=[1])\n" + + " LogicalProject(int_value=[TONUMBER('010101':VARCHAR, 2)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedResult = "int_value=21.0\n"; verifyResult(root, expectedResult); - String expectedSparkSql = "SELECT `TONUMBER`('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + String expectedSparkSql = + "SELECT `TONUMBER`('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testNumberHex() { + String ppl = "source=EMP | eval int_value = tonumber('FA34',16) | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(int_value=[TONUMBER('FA34':VARCHAR, 16)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=64052.0\n"; + verifyResult(root, expectedResult); - @Test - public void testNumberHex() { - String ppl = "source=EMP | eval int_value = tonumber('FA34',16) | fields int_value|head 1"; - RelNode root = getRelNode(ppl); - String expectedLogical = "LogicalSort(fetch=[1])\n LogicalProject(int_value=[TONUMBER('FA34':VARCHAR, 16)])\n LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - String expectedResult = "int_value=64052.0\n"; - verifyResult(root, expectedResult); - - String expectedSparkSql = "SELECT `TONUMBER`('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; - verifyPPLToSparkSQL(root, expectedSparkSql); - } + String expectedSparkSql = + "SELECT `TONUMBER`('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } - @Test - public void testNumber() { - String ppl = "source=EMP | eval int_value = tonumber('4598') | fields int_value|head 1"; - RelNode root = getRelNode(ppl); - String expectedLogical = "LogicalSort(fetch=[1])\n LogicalProject(int_value=[TONUMBER('4598':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - String expectedResult = "int_value=4598.0\n"; - verifyResult(root, expectedResult); + @Test + public void testNumber() { + String ppl = "source=EMP | eval int_value = tonumber('4598') | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(int_value=[TONUMBER('4598':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=4598.0\n"; + verifyResult(root, expectedResult); - String expectedSparkSql = "SELECT `TONUMBER`('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; - verifyPPLToSparkSQL(root, expectedSparkSql); - } + String expectedSparkSql = "SELECT `TONUMBER`('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } - @Test - public void testNumberDecimal() { - String ppl = "source=EMP | eval int_value = tonumber('4598.54922') | fields int_value|head 1"; - RelNode root = getRelNode(ppl); - String expectedLogical = "LogicalSort(fetch=[1])\n LogicalProject(int_value=[TONUMBER('4598.54922':VARCHAR)])\n LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - String expectedResult = "int_value=4598.54922\n"; - verifyResult(root, expectedResult); + @Test + public void testNumberDecimal() { + String ppl = "source=EMP | eval int_value = tonumber('4598.54922') | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(int_value=[TONUMBER('4598.54922':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=4598.54922\n"; + verifyResult(root, expectedResult); - String expectedSparkSql = "SELECT `TONUMBER`('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; - verifyPPLToSparkSQL(root, expectedSparkSql); - } + String expectedSparkSql = + "SELECT `TONUMBER`('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } From b5346a6dcfb385f040cc103f8ad480678dc0156a Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 4 Nov 2025 18:20:04 -0800 Subject: [PATCH 27/99] removed decimal point from hex Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunctionTest.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java index cd976956408..d457a8a36a9 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java @@ -80,36 +80,36 @@ void testToNumberWithBase36() { @Test void testToNumberWithDecimalBase2() { - assertEquals(2L, ToNumberFunction.toNumber("10.1", 2)); - assertEquals(1L, ToNumberFunction.toNumber("1.1", 2)); - assertEquals(3L, ToNumberFunction.toNumber("11.11", 2)); + assertEquals(2L, ToNumberFunction.toNumber("10", 2)); + assertEquals(1L, ToNumberFunction.toNumber("1", 2)); + assertEquals(3L, ToNumberFunction.toNumber("11", 2)); } @Test void testToNumberWithDecimalBase16() { - assertEquals(255L, ToNumberFunction.toNumber("FF.8", 16)); - assertEquals(16L, ToNumberFunction.toNumber("10.4", 16)); - assertEquals(171L, ToNumberFunction.toNumber("AB.B", 16)); + assertEquals(255L, ToNumberFunction.toNumber("FF", 16)); + assertEquals(16L, ToNumberFunction.toNumber("10", 16)); + assertEquals(171L, ToNumberFunction.toNumber("AB", 16)); } @Test void testToNumberWithNegativeDecimal() { - assertEquals(-2L, ToNumberFunction.toNumber("-10.1", 2)); - assertEquals(-255L, ToNumberFunction.toNumber("-FF.8", 16)); + assertEquals(-2L, ToNumberFunction.toNumber("-10", 2)); + assertEquals(-255L, ToNumberFunction.toNumber("-FF", 16)); assertEquals(-123.45, ToNumberFunction.toNumber("-123.45", 10)); } @Test void testToNumberWithEmptyFractionalPart() { assertEquals(123.0, ToNumberFunction.toNumber("123.", 10)); - assertEquals(255L, ToNumberFunction.toNumber("FF.", 16)); - assertEquals(5L, ToNumberFunction.toNumber("101.", 2)); + assertEquals(255L, ToNumberFunction.toNumber("FF", 16)); + assertEquals(5L, ToNumberFunction.toNumber("101", 2)); } @Test void testToNumberWithZeroIntegerPart() { assertEquals(0.5, ToNumberFunction.toNumber("0.5", 10)); - assertEquals(0L, ToNumberFunction.toNumber("0.1", 2)); + assertEquals(0L, ToNumberFunction.toNumber("0", 2)); } @Test From 7cf867b4fa7a1e170df483c3b84e992b62c771a0 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Wed, 5 Nov 2025 09:56:12 -0800 Subject: [PATCH 28/99] added doctest fixes Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 70 +++++++++++++------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index fd34681b03f..52eb04b8e63 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -43,21 +43,21 @@ Cast to string example:: os> source=people | eval `cbool` = CAST(true as string), `cint` = CAST(1 as string), `cdate` = CAST(CAST('2012-08-07' as date) as string) | fields `cbool`, `cint`, `cdate` fetched rows / total rows = 1/1 - +---------+--------+------------+ - | cbool | cint | cdate | - |---------+--------+------------| - | TRUE | 1 | 2012-08-07 | - +---------+--------+------------+ + +-------+------+------------+ + | cbool | cint | cdate | + |-------+------+------------| + | TRUE | 1 | 2012-08-07 | + +-------+------+------------+ Cast to number example:: os> source=people | eval `cbool` = CAST(true as int), `cstring` = CAST('1' as int) | fields `cbool`, `cstring` fetched rows / total rows = 1/1 - +---------+-----------+ - | cbool | cstring | - |---------+-----------| - | 1 | 1 | - +---------+-----------+ + +-------+---------+ + | cbool | cstring | + |-------+---------| + | 1 | 1 | + +-------+---------+ Cast to date example:: @@ -73,11 +73,11 @@ Cast function can be chained:: os> source=people | eval `cbool` = CAST(CAST(true as string) as boolean) | fields `cbool` fetched rows / total rows = 1/1 - +---------+ - | cbool | - |---------| - | True | - +---------+ + +-------+ + | cbool | + |-------| + | True | + +-------+ IMPLICIT (AUTO) TYPE CONVERSION @@ -101,11 +101,11 @@ Use string in arithmetic operator example :: os> source=people | eval divide="5"/10, multiply="5" * 10, add="5" + 10, minus="5" - 10, concat="5" + "5" | fields divide, multiply, add, minus, concat fetched rows / total rows = 1/1 - +----------+------------+-------+---------+----------+ - | divide | multiply | add | minus | concat | - |----------+------------+-------+---------+----------| - | 0.5 | 50.0 | 15.0 | -5.0 | 55 | - +----------+------------+-------+---------+----------+ + +--------+----------+------+-------+--------+ + | divide | multiply | add | minus | concat | + |--------+----------+------+-------+--------| + | 0.5 | 50.0 | 15.0 | -5.0 | 55 | + +--------+----------+------+-------+--------+ Use string in comparison operator example :: @@ -141,30 +141,30 @@ Following example converts a string in binary to the number representation:: os> source=people | eval int_value = tonumber('010101',2) | fields int_value | head 1 fetched rows / total rows = 1/1 - +-------------+ - | int_value | - |-------------| - | 21.0 | - +-------------+ + +-----------+ + | int_value | + |-----------| + | 21.0 | + +-----------+ Following example converts a string in hex to the number representation:: os> source=people | eval int_value = tonumber('FA34',16) | fields int_value | head 1 fetched rows / total rows = 1/1 - +-------------+ - | int_value | - |-------------| - | 64052.0 | - +-------------+ + +-----------+ + | int_value | + |-----------| + | 64052.0 | + +-----------+ Following example converts a string in decimal to the number representation:: os> source=people | eval int_value = tonumber('4598') | fields int_value | head 1 fetched rows / total rows = 1/1 - +-------------+ - | int_value | - |-------------| - | 4598.0 | - +-------------+ + +-----------+ + | int_value | + |-----------| + | 4598.0 | + +-----------+ From 46f010bb62eca84723683347d56cf31323f9ff5b Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Wed, 5 Nov 2025 23:11:47 -0800 Subject: [PATCH 29/99] removed unused variables Signed-off-by: Asif Bashar --- .../sql/expression/function/udf/ToNumberFunction.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 479cbefaf87..ba30a7a7d1d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -36,14 +36,7 @@ public ToNumberFunction() { NullPolicy.ANY); } - public static final String DURATION_FORMAT = "duration"; - public static final String DURATION_MILLIS_FORMAT = "duration_millis"; - public static final String HEX_FORMAT = "hex"; - public static final String COMMAS_FORMAT = "commas"; - public static final String BINARY_FORMAT = "binary"; - public static final SqlFunctions.DateFormatFunction dateTimeFormatter = - new SqlFunctions.DateFormatFunction(); - public static final String format24hour = "%H:%M:%S"; // 24-hour format + @Override public SqlReturnTypeInference getReturnTypeInference() { From b7afa1713253d1adc49d343467cdda50d98869a4 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 6 Nov 2025 06:36:34 -0800 Subject: [PATCH 30/99] spotless Signed-off-by: Asif Bashar --- .../sql/expression/function/udf/ToNumberFunction.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index ba30a7a7d1d..5b3a49fe549 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -13,7 +13,6 @@ import org.apache.calcite.linq4j.tree.Expression; import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.rex.RexCall; -import org.apache.calcite.runtime.SqlFunctions; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.opensearch.sql.calcite.utils.PPLOperandTypes; @@ -36,8 +35,6 @@ public ToNumberFunction() { NullPolicy.ANY); } - - @Override public SqlReturnTypeInference getReturnTypeInference() { return ReturnTypes.DOUBLE_FORCE_NULLABLE; From 0f0125a127778b29f74f69aeb16119d9d3716074 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 6 Nov 2025 10:24:54 -0800 Subject: [PATCH 31/99] made recommended changes to simply parse to number, null when malformed Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 52 +++++-------------- .../function/udf/ToNumberFunctionTest.java | 29 +++-------- 2 files changed, 19 insertions(+), 62 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 5b3a49fe549..2557121b0c8 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -5,6 +5,7 @@ package org.opensearch.sql.expression.function.udf; +import java.math.BigInteger; import java.util.List; import org.apache.calcite.adapter.enumerable.NotNullImplementor; import org.apache.calcite.adapter.enumerable.NullPolicy; @@ -69,50 +70,23 @@ public static Number toNumber(String numStr) { @Strict public static Number toNumber(String numStr, int base) { if (base < 2 || base > 36) { - throw new IllegalArgumentException("Base must be between 2 and 36"); + throw new IllegalArgumentException("Base has to be between 2 and 36."); } - - if (numStr.contains(".")) { - - boolean isNegative = numStr.startsWith("-"); - if (isNegative) { - numStr = numStr.substring(1); - } - - // Split integer and fractional parts - String[] parts = numStr.split("\\."); - String intPart = parts[0]; - String fracPart = parts.length > 1 ? parts[1] : ""; - - // Convert integer part - double intValue = 0; - for (char c : intPart.toCharArray()) { - int digit = Character.digit(c, base); - if (digit < 0) throw new IllegalArgumentException("Invalid digit: " + c); - intValue = intValue * base + digit; - } - - // Convert fractional part - double fracValue = 0; + Number result = null; + try { if (base == 10) { - double divisor = base; - for (char c : fracPart.toCharArray()) { - int digit = Character.digit(c, base); - if (digit < 0) throw new IllegalArgumentException("Invalid digit: " + c); - fracValue += (double) digit / divisor; - divisor *= base; + if (numStr.contains(".")) { + result = Double.parseDouble(numStr); + } else { + result = Long.parseLong(numStr); } - } - - double result = intValue + fracValue; - result = isNegative ? -result : result; - if (base == 10) { - return result; } else { - return (long) result; + BigInteger bigInteger = new BigInteger(numStr, base); + result = bigInteger.longValue(); } - } else { - return Long.parseLong(numStr, base); + } catch (Exception e) { + } + return result; } } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java index d457a8a36a9..34ed102ec5b 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java @@ -141,29 +141,12 @@ void testToNumberInvalidBase() { @Test void testToNumberInvalidDigits() { - assertThrows( - IllegalArgumentException.class, - () -> { - ToNumberFunction.toNumber("12A", 10); - }); - - assertThrows( - IllegalArgumentException.class, - () -> { - ToNumberFunction.toNumber("102", 2); - }); - - assertThrows( - IllegalArgumentException.class, - () -> { - ToNumberFunction.toNumber("189", 8); - }); - - assertThrows( - IllegalArgumentException.class, - () -> { - ToNumberFunction.toNumber("GHI", 16); - }); + assertEquals(null, ToNumberFunction.toNumber("12A", 10)); + assertEquals(null, ToNumberFunction.toNumber("102", 2)); + assertEquals(null, ToNumberFunction.toNumber("101.101", 2)); + assertEquals(null, ToNumberFunction.toNumber("189", 8)); + assertEquals(null, ToNumberFunction.toNumber("GHI", 16)); + assertEquals(null, ToNumberFunction.toNumber("FF.8", 16)); } @Test From 804db79a7ad546937cece7a99eea0e84af32c66a Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Sat, 8 Nov 2025 16:51:14 -0800 Subject: [PATCH 32/99] hex max limit doc and unit tests Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 3 +- .../CalcitePPLToNumberFunctionTest.java | 80 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index ca9297b1f0d..c55e868f2cd 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -133,7 +133,8 @@ Return type: Number You can use this function with the eval commands and as part of eval expressions. -Base values can be between 2 and 36. +Base values can be between 2 and 36. The maximum value supported for base 10 is +(2-2^-52)·2^1023 and minimum is -(2-2^-52)·2^1023. +The maximum for other supported bases is 2^63-1 (or 7FFFFFFFFFFFFFFF) and minimum is -2^63 (or -7FFFFFFFFFFFFFFF). You can use this function to convert a string representation of a binary number to return the corresponding number in base 10. diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java index 37053e59f51..448cddeefec 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java @@ -49,6 +49,86 @@ public void testNumberHex() { verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testNumberHexMinLimit() { + String ppl = + "source=EMP | eval long_value = tonumber('-7FFFFFFFFFFFFFFF',16) | fields long_value|head" + + " 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(long_value=[TONUMBER('-7FFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "long_value=-9.223372036854776E18\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TONUMBER`('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNumberHexMaxLimit() { + String ppl = + "source=EMP | eval long_value = tonumber('7FFFFFFFFFFFFFFF',16) | fields long_value|head" + + " 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(long_value=[TONUMBER('7FFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "long_value=9.223372036854776E18\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TONUMBER`('7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNumberHexOverNegativeMaxLimit() { + String ppl = + "source=EMP | eval long_value = tonumber('-FFFFFFFFFFFFFFFF',16) | fields long_value|head" + + " 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(long_value=[TONUMBER('-FFFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "long_value=1.0\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TONUMBER`('-FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testNumberHexOverPositiveMaxLimit() { + String ppl = + "source=EMP | eval long_value = tonumber('FFFFFFFFFFFFFFFF',16) | fields long_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(long_value=[TONUMBER('FFFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "long_value=-1.0\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TONUMBER`('FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testNumber() { String ppl = "source=EMP | eval int_value = tonumber('4598') | fields int_value|head 1"; From 75883979380e8cad8b0f4e2318ecaf748fd6d0aa Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Sun, 9 Nov 2025 10:51:54 -0800 Subject: [PATCH 33/99] fix to spotless Signed-off-by: Asif Bashar --- .../CalcitePPLToNumberFunctionTest.java | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java index 448cddeefec..bd4f2c293b6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java @@ -49,26 +49,25 @@ public void testNumberHex() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testNumberHexMinLimit() { + String ppl = + "source=EMP | eval long_value = tonumber('-7FFFFFFFFFFFFFFF',16) | fields long_value|head" + + " 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(long_value=[TONUMBER('-7FFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "long_value=-9.223372036854776E18\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TONUMBER`('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; - @Test - public void testNumberHexMinLimit() { - String ppl = - "source=EMP | eval long_value = tonumber('-7FFFFFFFFFFFFFFF',16) | fields long_value|head" - + " 1"; - RelNode root = getRelNode(ppl); - String expectedLogical = - "LogicalSort(fetch=[1])\n" - + " LogicalProject(long_value=[TONUMBER('-7FFFFFFFFFFFFFFF':VARCHAR, 16)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; - verifyLogical(root, expectedLogical); - String expectedResult = "long_value=-9.223372036854776E18\n"; - verifyResult(root, expectedResult); - - String expectedSparkSql = - "SELECT `TONUMBER`('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; - - verifyPPLToSparkSQL(root, expectedSparkSql); - } + verifyPPLToSparkSQL(root, expectedSparkSql); + } @Test public void testNumberHexMaxLimit() { From 016f8f8c1f2455beb71b3c851d55b52a8c658fbf Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 6 Nov 2025 16:18:34 +0800 Subject: [PATCH 34/99] Support script pushdown in sort-on-measure pushdown rewriting (#4749) * Support script pushdown in sort-on-measure pushdown rewriting Signed-off-by: Lantao Jin * fix IT Signed-off-by: Lantao Jin * Merge remote-tracking branch 'upstream/main' into issues/4738 * remove system.out.print Signed-off-by: Lantao Jin * Fix IT Signed-off-by: Lantao Jin --- .../sql/common/grok/GrokCompiler.java | 1 - .../sql/common/grok/ApacheDataTypeTest.java | 1 - .../sql/common/grok/ApacheTest.java | 2 - .../opensearch/sql/common/grok/GrokTest.java | 1 - .../sql/common/grok/MessagesTest.java | 1 - .../sql/calcite/utils/BinUtilsTest.java | 8 - .../sql/calcite/remote/CalciteExplainIT.java | 23 ++ .../remote/CalciteFillNullCommandIT.java | 1 - .../remote/CalciteMVAppendFunctionIT.java | 2 - .../calcite/remote/CalcitePPLPatternsIT.java | 1 - .../standalone/JsonExtractAllFunctionIT.java | 10 +- .../org/opensearch/sql/legacy/CursorIT.java | 1 - .../sql/legacy/TermQueryExplainIT.java | 1 - .../opensearch/sql/ppl/StatsCommandIT.java | 368 ++++++++++++++++++ .../calcite/clickbench/q12.yaml | 2 +- .../calcite/clickbench/q15.yaml | 2 +- .../calcite/clickbench/q17.yaml | 2 +- .../calcite/clickbench/q19.yaml | 5 +- .../calcite/clickbench/q40.yaml | 8 +- .../calcite/clickbench/q41.yaml | 2 +- .../calcite/clickbench/q42.yaml | 2 +- ...plain_agg_sort_on_measure_multi_terms.yaml | 2 +- ...gg_sort_on_measure_multi_terms_script.yaml | 12 + .../explain_agg_sort_on_measure_script.yaml | 12 + .../calcite/multi_terms_keyword.yaml | 2 +- .../unittest/SqlRequestFactoryTest.java | 1 - .../scan/context/AggPushDownAction.java | 29 +- 27 files changed, 452 insertions(+), 50 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml diff --git a/common/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java b/common/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java index 8f33d88344a..05cc158ce5d 100644 --- a/common/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java +++ b/common/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java @@ -185,7 +185,6 @@ public Grok compile(final String pattern, ZoneId defaultTimeZone, boolean namedO (group.get("subname") != null ? group.get("subname") : group.get("name"))); namedRegex = StringUtils.replace(namedRegex, "%{" + group.get("name") + "}", replacement, 1); - // System.out.println(_expanded_pattern); index++; } } diff --git a/common/src/test/java/org/opensearch/sql/common/grok/ApacheDataTypeTest.java b/common/src/test/java/org/opensearch/sql/common/grok/ApacheDataTypeTest.java index 7eb0e964dee..d73a28e9f50 100644 --- a/common/src/test/java/org/opensearch/sql/common/grok/ApacheDataTypeTest.java +++ b/common/src/test/java/org/opensearch/sql/common/grok/ApacheDataTypeTest.java @@ -49,7 +49,6 @@ public void test002_httpd_access_semi() throws GrokException { + " HTTP/%{NUMBER:httpversion;float})?|%{DATA:rawrequest})\" %{NUMBER:response;int}" + " (?:%{NUMBER:bytes;long}|-)"); - System.out.println(line); Match gm = grok.match(line); Map map = gm.capture(); diff --git a/common/src/test/java/org/opensearch/sql/common/grok/ApacheTest.java b/common/src/test/java/org/opensearch/sql/common/grok/ApacheTest.java index db420b16d35..5971be7fae0 100644 --- a/common/src/test/java/org/opensearch/sql/common/grok/ApacheTest.java +++ b/common/src/test/java/org/opensearch/sql/common/grok/ApacheTest.java @@ -38,7 +38,6 @@ public void test001_httpd_access() throws GrokException, IOException { BufferedReader br = new BufferedReader(new FileReader(LOG_FILE)); String line; - System.out.println("Starting test with httpd log"); while ((line = br.readLine()) != null) { Match gm = grok.match(line); final Map capture = gm.capture(); @@ -50,7 +49,6 @@ public void test001_httpd_access() throws GrokException, IOException { @Test public void test002_nasa_httpd_access() throws GrokException, IOException { Grok grok = compiler.compile("%{COMMONAPACHELOG}"); - System.out.println("Starting test with nasa log -- may take a while"); BufferedReader br; String line; File dir = new File(LOG_DIR_NASA); diff --git a/common/src/test/java/org/opensearch/sql/common/grok/GrokTest.java b/common/src/test/java/org/opensearch/sql/common/grok/GrokTest.java index 30938a91369..95eee461c59 100644 --- a/common/src/test/java/org/opensearch/sql/common/grok/GrokTest.java +++ b/common/src/test/java/org/opensearch/sql/common/grok/GrokTest.java @@ -317,7 +317,6 @@ public void test013_IpSet() throws Throwable { try (FileReader fr = new FileReader(Resources.getResource(ResourceManager.IP).getFile()); BufferedReader br = new BufferedReader(fr)) { String line; - System.out.println("Starting test with ip"); while ((line = br.readLine()) != null) { Match gm = grok.match(line); final Map map = gm.capture(); diff --git a/common/src/test/java/org/opensearch/sql/common/grok/MessagesTest.java b/common/src/test/java/org/opensearch/sql/common/grok/MessagesTest.java index 930da8caa8b..903183082b4 100644 --- a/common/src/test/java/org/opensearch/sql/common/grok/MessagesTest.java +++ b/common/src/test/java/org/opensearch/sql/common/grok/MessagesTest.java @@ -29,7 +29,6 @@ public void test001_linux_messages() throws GrokException, IOException { new BufferedReader( new FileReader(Resources.getResource(ResourceManager.MESSAGES).getFile())); String line; - System.out.println("Starting test with linux messages log -- may take a while"); while ((line = br.readLine()) != null) { Match gm = grok.match(line); Map map = gm.capture(); diff --git a/core/src/test/java/org/opensearch/sql/calcite/utils/BinUtilsTest.java b/core/src/test/java/org/opensearch/sql/calcite/utils/BinUtilsTest.java index 6c0e88d6f0c..b18e7ff26d3 100644 --- a/core/src/test/java/org/opensearch/sql/calcite/utils/BinUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/calcite/utils/BinUtilsTest.java @@ -106,7 +106,6 @@ public void testParseSpanStringWithExtendedTimeUnits() { public void testMonthUnitDetection() { // Specifically test that 1mon is detected as "mon" not "m" String result = SpanParser.extractTimeUnit("1mon"); - System.out.println("Result for '1mon': " + result); assertEquals("mon", result); } @@ -114,13 +113,6 @@ public void testMonthUnitDetection() { public void testFullMonthParsingChain() { // Test the full parsing chain for 1mon var spanInfo = SpanParser.parse("1mon"); - System.out.println( - "SpanInfo: type=" - + spanInfo.getType() - + ", value=" - + spanInfo.getValue() - + ", unit=" - + spanInfo.getUnit()); assertEquals(SpanType.TIME, spanInfo.getType()); assertEquals(1.0, spanInfo.getValue(), 0.001); assertEquals("mon", spanInfo.getUnit()); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 6da047e0c20..ffeabe8a79b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1149,6 +1149,17 @@ public void testExplainSortOnMeasure() throws IOException { TEST_INDEX_BANK))); } + @Test + public void testExplainSortOnMeasureWithScript() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_agg_sort_on_measure_script.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_state = lower(state) | " + + "stats bucket_nullable=false count() by new_state | sort `count()`")); + } + @Test public void testExplainSortOnMeasureMultiTerms() throws IOException { enabledOnlyWhenPushdownIsEnabled(); @@ -1160,6 +1171,18 @@ public void testExplainSortOnMeasureMultiTerms() throws IOException { + " gender, state | sort `count()`")); } + @Test + public void testExplainSortOnMeasureMultiTermsWithScript() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_agg_sort_on_measure_multi_terms_script.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_gender = lower(gender), new_state" + + " = lower(state) | stats bucket_nullable=false count() by new_gender, new_state |" + + " sort `count()`")); + } + @Test public void testExplainCompositeMultiBucketsAutoDateThenSortOnMeasureNotPushdown() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java index 9fb1a96046f..e1f1230eaa3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFillNullCommandIT.java @@ -137,7 +137,6 @@ public void testFillNullWithStringOnNumericAndStringMixedFields() { "source=%s | fields num0, str2 | fillnull value='test' num0 str2", TEST_INDEX_CALCS))); - System.out.println("Debugging error message: " + t); verifyErrorMessageContains( t, "fillnull failed: replacement value type VARCHAR is not compatible with field 'num0' " diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMVAppendFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMVAppendFunctionIT.java index cf84cbe7db6..9afb63056e8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMVAppendFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMVAppendFunctionIT.java @@ -97,8 +97,6 @@ public void testMvappendWithIntAndDouble() throws IOException { executeQuery( source(TEST_INDEX_BANK, "eval result = mvappend(1, 2.5) | head 1 | fields result")); - System.out.println(actual); - verifySchema(actual, schema("result", "array")); verifyDataRows(actual, rows(List.of(1, 2.5))); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java index 761fe9b965b..cefb46e4b53 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java @@ -428,7 +428,6 @@ public void testBrainAggregationModeWithGroupByClause_ShowNumberedToken() throws + " mode=aggregation show_numbered_token=true max_sample_count=5" + " variable_count_threshold=2 frequency_threshold_percentage=0.2", TEST_INDEX_HDFS_LOGS)); - System.out.println(result); verifySchema( result, schema("level", "string"), diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java index 44997e0538e..68bf57ea8dd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/JsonExtractAllFunctionIT.java @@ -9,7 +9,6 @@ import java.sql.SQLException; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexNode; @@ -84,14 +83,7 @@ private Map getMap(ResultSet resultSet, int columnIndex) throws assertNotNull(result); assertTrue(result instanceof Map); - @SuppressWarnings("unchecked") - Map map = (Map) result; - System.out.println( - "map: " - + map.entrySet().stream() - .map(entry -> entry.getKey() + "=" + entry.getValue()) - .collect(Collectors.joining(", "))); - return map; + return (Map) result; } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/CursorIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/CursorIT.java index 565c40b1212..5dea06b7634 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/CursorIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/CursorIT.java @@ -146,7 +146,6 @@ public void validNumberOfPages() throws IOException { response = new JSONObject(executeFetchQuery(selectQuery, 28, JDBC)); cursor = response.getString(CURSOR); verifyIsV2Cursor(response); - System.out.println(response); pageCount = 1; while (!cursor.isEmpty()) { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TermQueryExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TermQueryExplainIT.java index ab2808ee3fb..31650d9d30c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TermQueryExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TermQueryExplainIT.java @@ -118,7 +118,6 @@ public void testNonCompatibleMappingsButTheFieldIsNotUsed() throws IOException { explainQuery( "SELECT dog_name FROM opensearch-sql_test_index_dog, opensearch-sql_test_index_dog2" + " WHERE dog_name = 'dog'"); - System.out.println(result); assertThat(result, containsString("dog_name")); assertThat(result, containsString("_source")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java index c2b3ec0b407..3e0b93bc7cd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java @@ -806,4 +806,372 @@ public void testStatsByDependentGroupFields() throws IOException { verifyDataRows( response, rows(61, 310, 41, 10, 31), rows(60, 390, 49, 10, 39), rows(59, 260, 36, 10, 26)); } + + @Test + public void testStatsSortOnMeasure() throws IOException { + try { + setQueryBucketSize(5); + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false count() by state | sort - `count()` |" + + " head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, rows(30, "TX"), rows(28, "MD"), rows(27, "ID"), rows(25, "ME"), rows(25, "AL")); + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false count() by state | sort `count()` | head" + + " 5", + TEST_INDEX_ACCOUNT)); + if (!isPushdownDisabled()) { + verifyDataRows( + response, + rows(13, "NV"), + rows(13, "SC"), + rows(14, "CO"), + rows(14, "AZ"), + rows(14, "DE")); + } else { + verifyDataRows( + response, + rows(13, "NV"), + rows(13, "SC"), + rows(14, "DE"), + rows(14, "AZ"), + rows(14, "NM")); + } + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false sum(balance) as sum by state | sort sum" + + " | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(266971, "NV"), + rows(279840, "SC"), + rows(303856, "WV"), + rows(339454, "OR"), + rows(346934, "IN")); + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false sum(balance) as sum by state | sort -" + + " sum | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(782199, "TX"), + rows(732523, "MD"), + rows(710408, "MA"), + rows(709135, "TN"), + rows(657957, "ID")); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testStatsSpanSortOnMeasure() throws IOException { + try { + setQueryBucketSize(5); + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false count() as cnt by span(birthdate," + + " 1month) | sort - cnt | head 5", + TEST_INDEX_BANK)); + verifyDataRows( + response, + rows(2, "2018-06-01 00:00:00"), + rows(2, "2018-08-01 00:00:00"), + rows(1, "2017-10-01 00:00:00"), + rows(1, "2017-11-01 00:00:00"), + rows(1, "2018-11-01 00:00:00")); + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false count() as cnt by span(birthdate," + + " 1month) | sort cnt | head 5", + TEST_INDEX_BANK)); + verifyDataRows( + response, + rows(1, "2018-11-01 00:00:00"), + rows(1, "2017-11-01 00:00:00"), + rows(1, "2017-10-01 00:00:00"), + rows(2, "2018-08-01 00:00:00"), + rows(2, "2018-06-01 00:00:00")); + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false sum(balance) by span(age, 2) | sort -" + + " `sum(balance)` | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(2800620, 30), + rows(2537475, 38), + rows(2500167, 32), + rows(2473878, 28), + rows(2464796, 34)); + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false sum(balance) by span(age, 2) | sort" + + " `sum(balance)` | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(1223243, 40), + rows(2205897, 26), + rows(2288020, 36), + rows(2350499, 24), + rows(2408482, 22)); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testStatsSortOnMeasureWithScript() throws IOException { + try { + setQueryBucketSize(5); + JSONObject response = + executeQuery( + String.format( + "source=%s | eval new_state = lower(state) | stats bucket_nullable=false count()" + + " by new_state | sort - `count()` | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, rows(30, "tx"), rows(28, "md"), rows(27, "id"), rows(25, "me"), rows(25, "al")); + response = + executeQuery( + String.format( + "source=%s | eval new_state = lower(state) | stats bucket_nullable=false count()" + + " by new_state | sort `count()` | head 5", + TEST_INDEX_ACCOUNT)); + if (!isPushdownDisabled()) { + verifyDataRows( + response, + rows(13, "nv"), + rows(13, "sc"), + rows(14, "co"), + rows(14, "az"), + rows(14, "de")); + } else { + verifyDataRows( + response, + rows(13, "nv"), + rows(13, "sc"), + rows(14, "de"), + rows(14, "az"), + rows(14, "nm")); + } + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testStatsSpanSortOnMeasureWithScript() throws IOException { + try { + setQueryBucketSize(5); + JSONObject response = + executeQuery( + String.format( + "source=%s | eval new_age = age + 2 | stats bucket_nullable=false sum(balance) by" + + " span(new_age, 2) | sort - `sum(balance)` | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(2800620, 32), + rows(2537475, 40), + rows(2500167, 34), + rows(2473878, 30), + rows(2464796, 36)); + response = + executeQuery( + String.format( + "source=%s | eval new_age = age + 2 | stats bucket_nullable=false sum(balance) by" + + " span(new_age, 2) | sort `sum(balance)` | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(1223243, 42), + rows(2205897, 28), + rows(2288020, 38), + rows(2350499, 26), + rows(2408482, 24)); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testStatsSpanSortOnMeasureMultiTerms() throws IOException { + try { + setQueryBucketSize(5); + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false count() by gender, state | sort -" + + " `count()` | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(18, "M", "MD"), + rows(17, "M", "ID"), + rows(17, "F", "TX"), + rows(16, "M", "ME"), + rows(15, "M", "OK")); + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false count() by gender, state | sort" + + " `count()` | head 5", + TEST_INDEX_ACCOUNT)); + if (isCalciteEnabled()) { + if (!isPushdownDisabled()) { + verifyDataRows( + response, + rows(3, "F", "DE"), + rows(5, "F", "CT"), + rows(5, "F", "OR"), + rows(5, "F", "WI"), + rows(5, "M", "MI")); + } else { + verifyDataRows( + response, + rows(3, "F", "DE"), + rows(5, "F", "WI"), + rows(5, "F", "OR"), + rows(5, "M", "RI"), + rows(5, "F", "CT")); + } + } else { + verifyDataRows( + response, + rows(3, "F", "DE"), + rows(5, "M", "RI"), + rows(5, "M", "MI"), + rows(5, "F", "WI"), + rows(5, "M", "NE")); + } + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false sum(balance) as sum by gender, state |" + + " sort sum | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(85753, "F", "OR"), + rows(86793, "F", "DE"), + rows(100197, "F", "WI"), + rows(105693, "M", "NV"), + rows(124878, "M", "IN")); + response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false sum(balance) as sum by gender, state |" + + " sort - sum | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(505688, "F", "TX"), + rows(484567, "M", "MD"), + rows(432776, "M", "OK"), + rows(388568, "F", "AL"), + rows(382314, "F", "RI")); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testStatsSpanSortOnMeasureMultiTermsWithScript() throws IOException { + try { + setQueryBucketSize(5); + JSONObject response = + executeQuery( + String.format( + "source=%s | eval new_gender = lower(gender), new_state = lower(state) | stats" + + " bucket_nullable=false count() by new_gender, new_state | sort - `count()`" + + " | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(18, "m", "md"), + rows(17, "m", "id"), + rows(17, "f", "tx"), + rows(16, "m", "me"), + rows(15, "m", "ok")); + response = + executeQuery( + String.format( + "source=%s | eval new_gender = lower(gender), new_state = lower(state) | stats" + + " bucket_nullable=false count() by new_gender, new_state | sort `count()` |" + + " head 5", + TEST_INDEX_ACCOUNT)); + if (isCalciteEnabled()) { + if (!isPushdownDisabled()) { + verifyDataRows( + response, + rows(3, "f", "de"), + rows(5, "f", "ct"), + rows(5, "f", "or"), + rows(5, "f", "wi"), + rows(5, "m", "mi")); + } else { + verifyDataRows( + response, + rows(3, "f", "de"), + rows(5, "m", "ri"), + rows(5, "f", "ct"), + rows(5, "m", "mi"), + rows(5, "m", "ne")); + } + } else { + verifyDataRows( + response, + rows(3, "f", "de"), + rows(5, "m", "ri"), + rows(5, "m", "mi"), + rows(5, "f", "wi"), + rows(5, "m", "ne")); + } + response = + executeQuery( + String.format( + "source=%s | eval new_gender = lower(gender), new_state = lower(state) | stats" + + " bucket_nullable=false sum(balance) as sum by new_gender, new_state | sort" + + " sum | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(85753, "f", "or"), + rows(86793, "f", "de"), + rows(100197, "f", "wi"), + rows(105693, "m", "nv"), + rows(124878, "m", "in")); + response = + executeQuery( + String.format( + "source=%s | eval new_gender = lower(gender), new_state = lower(state) | stats" + + " bucket_nullable=false sum(balance) as sum by new_gender, new_state | sort" + + " - sum | head 5", + TEST_INDEX_ACCOUNT)); + verifyDataRows( + response, + rows(505688, "f", "tx"), + rows(484567, "m", "md"), + rows(432776, "m", "ok"), + rows(388568, "f", "al"), + rows(382314, "f", "ri")); + } finally { + resetQueryBucketSize(); + } + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml index 2193be94c08..9164e61b3e8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($31, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[MobilePhoneModel, MobilePhone, UserID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[u, MobilePhone, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["MobilePhoneModel","MobilePhone","UserID"],"excludes":[]},"aggregations":{"MobilePhone|MobilePhoneModel":{"multi_terms":{"terms":[{"field":"MobilePhone"},{"field":"MobilePhoneModel"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[MobilePhoneModel, MobilePhone, UserID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[u, MobilePhone, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["MobilePhoneModel","MobilePhone","UserID"],"excludes":[]},"aggregations":{"MobilePhone|MobilePhoneModel":{"multi_terms":{"terms":[{"field":"MobilePhone"},{"field":"MobilePhoneModel"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml index ae655f0e533..2b73585ff73 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID"],"excludes":[]},"aggregations":{"SearchEngineID|SearchPhrase":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID"],"excludes":[]},"aggregations":{"SearchEngineID|SearchPhrase":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml index b74403958e0..6b3ab9ed1fe 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($84), IS NOT NULL($63))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, UserID], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"UserID","boost":1.0}},{"exists":{"field":"SearchPhrase","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","UserID"],"excludes":[]},"aggregations":{"UserID|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, UserID], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"UserID","boost":1.0}},{"exists":{"field":"SearchPhrase","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","UserID"],"excludes":[]},"aggregations":{"UserID|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml index ccc50d9fd47..88a57952c89 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml @@ -9,7 +9,4 @@ calcite: LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], m=[EXTRACT('minute':VARCHAR, $17)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), UserID, m, SearchPhrase]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"SearchPhrase":{"terms":{"field":"SearchPhrase","missing_bucket":false,"order":"asc"}}},{"UserID":{"terms":{"field":"UserID","missing_bucket":false,"order":"asc"}}},{"m":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiRXZlbnRUaW1lIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Ae57CiAgIm9wIjogewogICAgIm5hbWUiOiAiRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAibGl0ZXJhbCI6ICJtaW51dGUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACUV2ZW50VGltZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzdAAPTGphdmEvdXRpbC9NYXA7eHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAlUSU1FU1RBTVB+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgASdAAERGF0ZXNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGXhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGwAAAABzcgATamF2YS51dGlsLkFycmF5TGlzdHiB0h2Zx2GdAwABSQAEc2l6ZXhwAAAAA3cEAAAAA3QAE3l5eXktTU0tZGQgSEg6bW06c3N0ABlzdHJpY3RfZGF0ZV9vcHRpb25hbF90aW1ldAAMZXBvY2hfbWlsbGlzeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":false,"value_type":"long","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[UserID, m, SearchPhrase, count()], SORT_AGG_METRICS->[3 DESC LAST], PROJECT->[count(), UserID, m, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID|m":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiRXZlbnRUaW1lIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Ae57CiAgIm9wIjogewogICAgIm5hbWUiOiAiRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAibGl0ZXJhbCI6ICJtaW51dGUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACUV2ZW50VGltZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzdAAPTGphdmEvdXRpbC9NYXA7eHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAlUSU1FU1RBTVB+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgASdAAERGF0ZXNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGXhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGwAAAABzcgATamF2YS51dGlsLkFycmF5TGlzdHiB0h2Zx2GdAwABSQAEc2l6ZXhwAAAAA3cEAAAAA3QAE3l5eXktTU0tZGQgSEg6bW06c3N0ABlzdHJpY3RfZGF0ZV9vcHRpb25hbF90aW1ldAAMZXBvY2hfbWlsbGlzeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"value_type":"long"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml index 2565e5d6964..3c74a22c40f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml @@ -10,7 +10,7 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableLimit(offset=[1000], fetch=[10]) - EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, TraficSourceID, AdvEngineID, URL, SearchEngineID, IsRefresh, Referer, CounterID], FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), PROJECT->[PageViews, TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","TraficSourceID","AdvEngineID","URL","SearchEngineID","IsRefresh","Referer","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"TraficSourceID":{"terms":{"field":"TraficSourceID","missing_bucket":false,"order":"asc"}}},{"AdvEngineID":{"terms":{"field":"AdvEngineID","missing_bucket":false,"order":"asc"}}},{"SearchEngineID":{"terms":{"field":"SearchEngineID","missing_bucket":false,"order":"asc"}}},{"Src":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBT3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIlNlYXJjaEVuZ2luZUlEIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiU01BTExJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJBZHZFbmdpbmVJRCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIlJlZmVyZXIiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQE8XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogMCwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJpbnB1dCI6IDEsCiAgICAgICAgICAgICAgIm5hbWUiOiAiJDEiCiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAibGl0ZXJhbCI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAyLAogICAgICAibmFtZSI6ICIkMiIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIiIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAADdAAOU2VhcmNoRW5naW5lSUR+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAVTSE9SVHQAB1JlZmVyZXJ+cQB+AAp0AAZTVFJJTkd0AAtBZHZFbmdpbmVJRHEAfgAMeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":false,"order":"asc"}}},{"Dst":{"terms":{"field":"URL","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..5=[{inputs}], PageViews=[$t5], TraficSourceID=[$t0], SearchEngineID=[$t2], AdvEngineID=[$t1], Src=[$t3], Dst=[$t4]) + EnumerableLimit(fetch=[10000]) + EnumerableLimit(offset=[1000], fetch=[10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, TraficSourceID, AdvEngineID, URL, SearchEngineID, IsRefresh, Referer, CounterID], FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","TraficSourceID","AdvEngineID","URL","SearchEngineID","IsRefresh","Referer","CounterID"],"excludes":[]},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBT3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIlNlYXJjaEVuZ2luZUlEIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiU01BTExJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJBZHZFbmdpbmVJRCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIlJlZmVyZXIiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQE8XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogMCwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJpbnB1dCI6IDEsCiAgICAgICAgICAgICAgIm5hbWUiOiAiJDEiCiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAibGl0ZXJhbCI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAyLAogICAgICAibmFtZSI6ICIkMiIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIiIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAADdAAOU2VhcmNoRW5naW5lSUR+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAVTSE9SVHQAB1JlZmVyZXJ+cQB+AAp0AAZTVFJJTkd0AAtBZHZFbmdpbmVJRHEAfgAMeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index 60737163de5..2670e3a77f4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]; NULL AS FALSE]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"bool":{"must":[{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"exists":{"field":"EventDate","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]; NULL AS FALSE]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"bool":{"must":[{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"exists":{"field":"EventDate","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index 08a95aa3cba..a4078d5f8c7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml index 87afd801267..6bde95ecf0c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), gender, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), gender, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml new file mode 100644 index 00000000000..b020c1003b1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC-nulls-first]) + LogicalProject(count()=[$2], new_gender=[$0], new_state=[$1]) + LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) + LogicalProject(new_gender=[$17], new_state=[$18]) + LogicalFilter(condition=[AND(IS NOT NULL($17), IS NOT NULL($18))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), new_gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJnZW5kZXIiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQAo3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiAwLAogICAgICAibmFtZSI6ICIkMCIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAZnZW5kZXJzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AC3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+ABF0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAYeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAaAAAAAHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAMfnEAfgAQdAAGU1RSSU5HfnEAfgAUdAAHS2V5d29yZHEAfgAZeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJzdGF0ZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydACjewogICJvcCI6IHsKICAgICJuYW1lIjogIkxPV0VSIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABXN0YXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml new file mode 100644 index 00000000000..52852044d92 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC-nulls-first]) + LogicalProject(count()=[$1], new_state=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(new_state=[$17]) + LogicalFilter(condition=[IS NOT NULL($17)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJzdGF0ZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydACjewogICJvcCI6IHsKICAgICJuYW1lIjogIkxPV0VSIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABXN0YXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/multi_terms_keyword.yaml index 8ce21fba101..4b52e71862f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/multi_terms_keyword.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/SqlRequestFactoryTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/SqlRequestFactoryTest.java index 9911f265f19..e3bde2c17a2 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/SqlRequestFactoryTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/SqlRequestFactoryTest.java @@ -167,7 +167,6 @@ public void testGenerateSqlRequest_prearedStatementFromPayload2() { Assert.assertTrue( preparedStatementRequest.getParameters().get(2) instanceof PreparedStatementRequest.StringParameter); - System.out.println(preparedStatementRequest.getParameters().get(3)); Assert.assertTrue( preparedStatementRequest.getParameters().get(3).getValue() instanceof Boolean); Assert.assertTrue(preparedStatementRequest.getParameters().get(4).getValue() instanceof Long); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 5c9359bfab3..009dc0f57b7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -156,7 +156,7 @@ public void rePushDownSortAggMeasure( src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket())) { // multi-term agg MultiTermsAggregationBuilder multiTermsBuilder = - buildMultiTermsAggregationBuilder(composite); + buildMultiTermsAggregationBuilder(composite, bucketOrder); attachSubAggregations(composite.getSubAggregations(), path, multiTermsBuilder); aggregationBuilder = Pair.of( @@ -243,7 +243,12 @@ private TermsAggregationBuilder buildTermsAggregationBuilder( TermsValuesSourceBuilder terms, BucketOrder bucketOrder, int newSize) { TermsAggregationBuilder termsBuilder = new TermsAggregationBuilder(terms.name()); termsBuilder.size(newSize); - termsBuilder.field(terms.field()); + if (terms.field() != null) { + termsBuilder.field(terms.field()); + } + if (terms.script() != null) { + termsBuilder.script(terms.script()); + } if (terms.userValuetypeHint() != null) { termsBuilder.userValueTypeHint(terms.userValuetypeHint()); } @@ -258,7 +263,12 @@ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( DateHistogramValuesSourceBuilder dateHisto, BucketOrder bucketOrder) { DateHistogramAggregationBuilder dateHistoBuilder = new DateHistogramAggregationBuilder(dateHisto.name()); - dateHistoBuilder.field(dateHisto.field()); + if (dateHisto.field() != null) { + dateHistoBuilder.field(dateHisto.field()); + } + if (dateHisto.script() != null) { + dateHistoBuilder.script(dateHisto.script()); + } try { dateHistoBuilder.fixedInterval(dateHisto.getIntervalAsFixed()); } catch (IllegalArgumentException e) { @@ -275,7 +285,12 @@ private DateHistogramAggregationBuilder buildDateHistogramAggregationBuilder( private HistogramAggregationBuilder buildHistogramAggregationBuilder( HistogramValuesSourceBuilder histo, BucketOrder bucketOrder) { HistogramAggregationBuilder histoBuilder = new HistogramAggregationBuilder(histo.name()); - histoBuilder.field(histo.field()); + if (histo.field() != null) { + histoBuilder.field(histo.field()); + } + if (histo.script() != null) { + histoBuilder.script(histo.script()); + } histoBuilder.interval(histo.interval()); if (histo.userValuetypeHint() != null) { histoBuilder.userValueTypeHint(histo.userValuetypeHint()); @@ -286,7 +301,7 @@ private HistogramAggregationBuilder buildHistogramAggregationBuilder( /** Build a {@link MultiTermsAggregationBuilder} by {@link CompositeAggregationBuilder} */ private MultiTermsAggregationBuilder buildMultiTermsAggregationBuilder( - CompositeAggregationBuilder composite) { + CompositeAggregationBuilder composite, BucketOrder bucketOrder) { MultiTermsAggregationBuilder multiTermsBuilder = new MultiTermsAggregationBuilder(multiTermsBucketNameAsString(composite)); multiTermsBuilder.size(composite.size()); @@ -298,10 +313,14 @@ private MultiTermsAggregationBuilder buildMultiTermsAggregationBuilder( MultiTermsValuesSourceConfig.Builder config = new MultiTermsValuesSourceConfig.Builder(); config.setFieldName(termValue.field()); + if (termValue.script() != null) { + config.setScript(termValue.script()); + } config.setUserValueTypeHint(termValue.userValuetypeHint()); return config.build(); }) .toList()); + multiTermsBuilder.order(bucketOrder); return multiTermsBuilder; } From 663049dd8e2d823a7de89639aa23776706d30633 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 7 Nov 2025 16:37:53 +0800 Subject: [PATCH 35/99] Support `chart` command in PPL (#4579) * WIP: Make poc implementation for chart command Signed-off-by: Yuanchun Shen * Support param useother and otherstr Signed-off-by: Yuanchun Shen * Support usenull and nullstr (when both row split and col split present) Signed-off-by: Yuanchun Shen * Append a final aggregation to merge OTHER categories Signed-off-by: Yuanchun Shen * Handle common agg functions for OTHER category for timechart Signed-off-by: Yuanchun Shen # Conflicts: # core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java * Fix timechart IT Signed-off-by: Yuanchun Shen * Sort earliest results with asc order Signed-off-by: Yuanchun Shen * Support non-string fields as column split Signed-off-by: Yuanchun Shen * Fix min/earliest order & fix non-accumulative agg for chart Signed-off-by: Yuanchun Shen * Hint non-null in aggregateWithTrimming Signed-off-by: Yuanchun Shen * Add integration tests for chart command Signed-off-by: Yuanchun Shen * Add unit tests Signed-off-by: Yuanchun Shen * Add doc for chart command Signed-off-by: Yuanchun Shen * Prompt users that multiple agg is not supported Signed-off-by: Yuanchun Shen * Add explain ITs Signed-off-by: Yuanchun Shen * Remove unimplemented support for multiple aggregations in chart command Signed-off-by: Yuanchun Shen * Add unit tests for chart command Signed-off-by: Yuanchun Shen * Remove irrelevant yaml test Signed-off-by: Yuanchun Shen * Tweak chart.rst Signed-off-by: Yuanchun Shen * Swap the order of chart output to ensure metrics come last Signed-off-by: Yuanchun Shen * Filter rows without col split when calculate grand total Signed-off-by: Yuanchun Shen * Chores: tweak code order Signed-off-by: Yuanchun Shen * Add anonymize test to chart command Signed-off-by: Yuanchun Shen * Change grammart from limit=top 10 to limit=top10 Signed-off-by: Yuanchun Shen * Update chart doc Signed-off-by: Yuanchun Shen * Rename __row_number__ for chart to _row_number_chart_ Signed-off-by: Yuanchun Shen * Sort by row and col splits on top of chart results Signed-off-by: Yuanchun Shen * Ignore rows without a row split in chart command Signed-off-by: Yuanchun Shen * Keep categories with max summed values when top k is set Signed-off-by: Yuanchun Shen * Simplify toAddHintsOnAggregate condition Signed-off-by: Yuanchun Shen * Chores: eliminate unnecessary variables Signed-off-by: Yuanchun Shen * Apply a non-null filter on fields referred by aggregations Signed-off-by: Yuanchun Shen * Fix chart plans Signed-off-by: Yuanchun Shen * Get rid of record class Signed-off-by: Yuanchun Shen * Move ranking by column split to a helper function Signed-off-by: Yuanchun Shen --------- Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/analysis/Analyzer.java | 6 + .../sql/ast/AbstractNodeVisitor.java | 5 + .../org/opensearch/sql/ast/tree/Chart.java | 55 +++ .../sql/calcite/CalciteRelNodeVisitor.java | 399 ++++++++++++++--- .../sql/calcite/utils/PlanUtils.java | 1 + docs/category.json | 1 + docs/user/ppl/cmd/chart.rst | 213 +++++++++ docs/user/ppl/index.rst | 2 + .../calcite/remote/CalciteChartCommandIT.java | 331 ++++++++++++++ .../sql/calcite/remote/CalciteExplainIT.java | 67 +++ .../remote/CalciteTimechartCommandIT.java | 30 +- .../calcite/chart_multiple_group_keys.yaml | 36 ++ .../calcite/chart_null_str.yaml | 40 ++ .../calcite/chart_single_group_key.yaml | 10 + .../chart_timestamp_span_and_category.yaml | 32 ++ .../calcite/chart_use_other.yaml | 31 ++ .../calcite/chart_with_integer_span.yaml | 11 + .../calcite/chart_with_limit.yaml | 10 + .../calcite/chart_with_timestamp_span.yaml | 11 + .../explain_agg_script_udt_arg_push.yaml | 2 +- .../calcite/explain_timechart.yaml | 38 +- .../chart_multiple_group_keys.yaml | 40 ++ .../chart_multiple_groups.yaml | 37 ++ .../calcite_no_pushdown/chart_null_str.yaml | 40 ++ .../chart_single_group.yaml | 15 + .../chart_single_group_key.yaml | 15 + .../chart_timestamp_span_and_category.yaml | 38 ++ .../calcite_no_pushdown/chart_use_other.yaml | 37 ++ .../chart_with_integer_span.yaml | 15 + .../calcite_no_pushdown/chart_with_limit.yaml | 15 + .../chart_with_timestamp_span.yaml | 15 + .../explain_timechart.yaml | 38 +- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 6 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 23 + .../opensearch/sql/ppl/parser/AstBuilder.java | 104 +++-- .../sql/ppl/parser/AstExpressionBuilder.java | 54 +++ .../sql/ppl/utils/ArgumentFactory.java | 33 ++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 37 ++ .../sql/ppl/calcite/CalcitePPLChartTest.java | 412 ++++++++++++++++++ .../ppl/calcite/CalcitePPLTimechartTest.java | 8 +- .../sql/ppl/parser/AstBuilderTest.java | 76 ++++ .../ppl/parser/AstExpressionBuilderTest.java | 63 +++ .../sql/ppl/utils/ArgumentFactoryTest.java | 35 ++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 28 ++ 44 files changed, 2335 insertions(+), 180 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Chart.java create mode 100644 docs/user/ppl/cmd/chart.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/chart_multiple_group_keys.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/chart_null_str.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/chart_single_group_key.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/chart_use_other.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/chart_with_integer_span.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/chart_with_limit.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_multiple_group_keys.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_multiple_groups.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_null_str.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_single_group.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_single_group_key.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_timestamp_span_and_category.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_use_other.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_integer_span.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_limit.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_timestamp_span.yaml create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index e79c15e5881..f7017565595 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -61,6 +61,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -769,6 +770,11 @@ public LogicalPlan visitSpath(SPath node, AnalysisContext context) { throw getOnlyForCalciteException("Spath"); } + @Override + public LogicalPlan visitChart(Chart node, AnalysisContext context) { + throw getOnlyForCalciteException("Chart"); + } + @Override public LogicalPlan visitTimechart(Timechart node, AnalysisContext context) { throw getOnlyForCalciteException("Timechart"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 0dd475c5612..320723fd57c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -49,6 +49,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -275,6 +276,10 @@ public T visitReverse(Reverse node, C context) { return visitChildren(node, context); } + public T visitChart(Chart node, C context) { + return visitChildren(node, context); + } + public T visitTimechart(Timechart node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java new file mode 100644 index 00000000000..d0f982edce6 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +/** AST node represent chart command. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@AllArgsConstructor +@lombok.Builder(toBuilder = true) +public class Chart extends UnresolvedPlan { + public static final Literal DEFAULT_USE_OTHER = Literal.TRUE; + public static final Literal DEFAULT_OTHER_STR = AstDSL.stringLiteral("OTHER"); + public static final Literal DEFAULT_LIMIT = AstDSL.intLiteral(10); + public static final Literal DEFAULT_USE_NULL = Literal.TRUE; + public static final Literal DEFAULT_NULL_STR = AstDSL.stringLiteral("NULL"); + public static final Literal DEFAULT_TOP = Literal.TRUE; + + private UnresolvedPlan child; + private UnresolvedExpression rowSplit; + private UnresolvedExpression columnSplit; + private UnresolvedExpression aggregationFunction; + private List arguments; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitChart(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 09ad5d4009a..94086952610 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -30,6 +30,7 @@ import com.google.common.collect.Streams; import java.util.ArrayList; import java.util.Arrays; +import java.util.BitSet; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; @@ -39,7 +40,9 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.stream.Stream; +import lombok.AllArgsConstructor; import org.apache.calcite.adapter.enumerable.RexToLixTranslator; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.ViewExpanders; @@ -66,6 +69,7 @@ import org.apache.calcite.sql.type.MapSqlType; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilder.AggCall; import org.apache.calcite.util.Holder; @@ -102,6 +106,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; @@ -921,7 +926,8 @@ private boolean isCountField(RexCall call) { private Pair, List> aggregateWithTrimming( List groupExprList, List aggExprList, - CalcitePlanContext context) { + CalcitePlanContext context, + boolean hintBucketNonNull) { Pair, List> resolved = resolveAttributesForAggregation(groupExprList, aggExprList, context); List resolvedGroupByList = resolved.getLeft(); @@ -1025,6 +1031,7 @@ private Pair, List> aggregateWithTrimming( List intendedGroupKeyAliases = getGroupKeyNamesAfterAggregation(reResolved.getLeft()); context.relBuilder.aggregate( context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight()); + if (hintBucketNonNull) addIgnoreNullBucketHintToAggregate(context); // During aggregation, Calcite projects both input dependencies and output group-by fields. // When names conflict, Calcite adds numeric suffixes (e.g., "value0"). // Apply explicit renaming to restore the intended aliases. @@ -1091,8 +1098,39 @@ private Pair, List> resolveAttributesForAggregation( return Pair.of(groupByList, aggCallList); } + /** Visits an aggregation for stats command */ @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { + Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); + Boolean bucketNullable = + (Boolean) statsArgs.getOrDefault(Argument.BUCKET_NULLABLE, Literal.TRUE).getValue(); + int nGroup = node.getGroupExprList().size() + (Objects.nonNull(node.getSpan()) ? 1 : 0); + BitSet nonNullGroupMask = new BitSet(nGroup); + if (!bucketNullable) { + nonNullGroupMask.set(0, nGroup); + } + visitAggregation(node, context, nonNullGroupMask, true, false); + return context.relBuilder.peek(); + } + + /** + * Visits an aggregation node and builds the corresponding Calcite RelNode. + * + * @param node the aggregation node containing group expressions and aggregation functions + * @param context the Calcite plan context for building RelNodes + * @param nonNullGroupMask bit set indicating group by fields that need to be non-null + * @param metricsFirst if true, aggregation results (metrics) appear first in output schema + * (metrics, group-by fields); if false, group expressions appear first (group-by fields, + * metrics). + * @param includeAggFieldsInNullFilter if true, also applies non-null filters to aggregation input + * fields in addition to group-by fields + */ + private void visitAggregation( + Aggregation node, + CalcitePlanContext context, + BitSet nonNullGroupMask, + boolean metricsFirst, + boolean includeAggFieldsInNullFilter) { visitChildren(node, context); List aggExprList = node.getAggExprList(); @@ -1102,46 +1140,43 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { UnresolvedExpression span = node.getSpan(); if (Objects.nonNull(span)) { groupExprList.add(span); - List timeSpanFilters = - getTimeSpanField(span).stream() - .map(f -> rexVisitor.analyze(f, context)) - .map(context.relBuilder::isNotNull) - .toList(); - if (!timeSpanFilters.isEmpty()) { - // add isNotNull filter before aggregation for time span - context.relBuilder.filter(timeSpanFilters); + if (getTimeSpanField(span).isPresent()) { + nonNullGroupMask.set(0); } } groupExprList.addAll(node.getGroupExprList()); - // add stats hint to LogicalAggregation - Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); - Boolean bucketNullable = - (Boolean) statsArgs.getOrDefault(Argument.BUCKET_NULLABLE, Literal.TRUE).getValue(); - boolean toAddHintsOnAggregate = false; - if (!bucketNullable - && !groupExprList.isEmpty() - && !(groupExprList.size() == 1 && getTimeSpanField(span).isPresent())) { - toAddHintsOnAggregate = true; - // add isNotNull filter before aggregation for non-nullable buckets - List groupByList = - groupExprList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); - context.relBuilder.filter( - PlanUtils.getSelectColumns(groupByList).stream() - .map(context.relBuilder::field) - .map(context.relBuilder::isNotNull) - .toList()); - } + // Add stats hint to LogicalAggregation. + boolean toAddHintsOnAggregate = + !groupExprList.isEmpty() + // This checks if all group-bys should be nonnull + && nonNullGroupMask.nextClearBit(0) >= groupExprList.size(); + // Add isNotNull filter before aggregation for non-nullable buckets + List nonNullCandidates = + groupExprList.stream() + .map(expr -> rexVisitor.analyze(expr, context)) + .collect(Collectors.toCollection(ArrayList::new)); + if (includeAggFieldsInNullFilter) { + nonNullCandidates.addAll( + PlanUtils.getInputRefsFromAggCall( + aggExprList.stream().map(expr -> aggVisitor.analyze(expr, context)).toList())); + nonNullGroupMask.set(groupExprList.size(), nonNullCandidates.size()); + } + List nonNullFields = + IntStream.range(0, nonNullCandidates.size()) + .filter(nonNullGroupMask::get) + .mapToObj(nonNullCandidates::get) + .toList(); + context.relBuilder.filter( + PlanUtils.getSelectColumns(nonNullFields).stream() + .map(context.relBuilder::field) + .map(context.relBuilder::isNotNull) + .toList()); Pair, List> aggregationAttributes = - aggregateWithTrimming(groupExprList, aggExprList, context); - if (toAddHintsOnAggregate) { - addIgnoreNullBucketHintToAggregate(context); - } + aggregateWithTrimming(groupExprList, aggExprList, context, toAddHintsOnAggregate); // schema reordering - // As an example, in command `stats count() by colA, colB`, - // the sequence of output schema is "count, colA, colB". List outputFields = context.relBuilder.fields(); int numOfOutputFields = outputFields.size(); int numOfAggList = aggExprList.size(); @@ -1149,8 +1184,6 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { // Add aggregation results first List aggRexList = outputFields.subList(numOfOutputFields - numOfAggList, numOfOutputFields); - reordered.addAll(aggRexList); - // Add group by columns List aliasedGroupByList = aggregationAttributes.getLeft().stream() .map(this::extractAliasLiteral) @@ -1159,10 +1192,17 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { .map(context.relBuilder::field) .map(f -> (RexNode) f) .toList(); - reordered.addAll(aliasedGroupByList); + if (metricsFirst) { + // As an example, in command `stats count() by colA, colB`, + // the sequence of output schema is "count, colA, colB". + reordered.addAll(aggRexList); + // Add group by columns + reordered.addAll(aliasedGroupByList); + } else { + reordered.addAll(aliasedGroupByList); + reordered.addAll(aggRexList); + } context.relBuilder.project(reordered); - - return context.relBuilder.peek(); } private Optional getTimeSpanField(UnresolvedExpression expr) { @@ -2210,11 +2250,7 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) { .map(context.relBuilder::isNotNull) .toList()); } - aggregateWithTrimming(groupExprList, aggExprList, context); - - if (toAddHintsOnAggregate) { - addIgnoreNullBucketHintToAggregate(context); - } + aggregateWithTrimming(groupExprList, aggExprList, context, toAddHintsOnAggregate); // 2. add count() column with sort direction List partitionKeys = rexVisitor.analyze(node.getGroupExprList(), context); @@ -2338,7 +2374,7 @@ public RelNode visitFlatten(Flatten node, CalcitePlanContext context) { } /** Helper method to get the function name for proper column naming */ - private String getValueFunctionName(UnresolvedExpression aggregateFunction) { + private String getAggFieldAlias(UnresolvedExpression aggregateFunction) { if (aggregateFunction instanceof Alias) { return ((Alias) aggregateFunction).getName(); } @@ -2375,6 +2411,168 @@ private String getValueFunctionName(UnresolvedExpression aggregateFunction) { return sb.toString(); } + @Override + public RelNode visitChart(Chart node, CalcitePlanContext context) { + visitChildren(node, context); + ArgumentMap argMap = ArgumentMap.of(node.getArguments()); + ChartConfig config = ChartConfig.fromArguments(argMap); + List groupExprList = + Stream.of(node.getRowSplit(), node.getColumnSplit()).filter(Objects::nonNull).toList(); + Aggregation aggregation = + new Aggregation( + List.of(node.getAggregationFunction()), List.of(), groupExprList, null, List.of()); + BitSet nonNullGroupMask = new BitSet(groupExprList.size()); + // Rows without a row-split are always ignored + if (config.useNull) { + nonNullGroupMask.set(0); + } else { + nonNullGroupMask.set(0, groupExprList.size()); + } + visitAggregation(aggregation, context, nonNullGroupMask, false, true); + RelBuilder relBuilder = context.relBuilder; + + // If a second split does not present or limit equals 0, we go no further for limit, nullstr, + // otherstr parameters because all truncating & renaming is performed on the column split + if (node.getRowSplit() == null + || node.getColumnSplit() == null + || Objects.equals(config.limit, 0)) { + // The output of chart is expected to be ordered by row split names + relBuilder.sort(relBuilder.field(0)); + return relBuilder.peek(); + } + + // Convert the column split to string if necessary: column split was supposed to be pivoted to + // column names. This guarantees that its type compatibility with useother and usenull + RexNode colSplit = relBuilder.field(1); + String columnSplitName = relBuilder.peek().getRowType().getFieldNames().get(1); + if (!SqlTypeUtil.isCharacter(colSplit.getType())) { + colSplit = + relBuilder.alias( + context.rexBuilder.makeCast( + UserDefinedFunctionUtils.NULLABLE_STRING, colSplit, true, true), + columnSplitName); + } + relBuilder.project(relBuilder.field(0), colSplit, relBuilder.field(2)); + RelNode aggregated = relBuilder.peek(); + // 1: column-split, 2: agg + RelNode ranked = rankByColumnSplit(context, 1, 2, config.top); + + relBuilder.push(aggregated); + relBuilder.push(ranked); + + // on column-split = group key + relBuilder.join( + JoinRelType.LEFT, relBuilder.equals(relBuilder.field(2, 0, 1), relBuilder.field(2, 1, 0))); + + RexNode colSplitPostJoin = relBuilder.field(1); + RexNode lteCondition = + relBuilder.call( + SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + relBuilder.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_CHART), + relBuilder.literal(config.limit)); + if (!config.useOther) { + relBuilder.filter(lteCondition); + } + RexNode nullCondition = relBuilder.isNull(colSplitPostJoin); + + RexNode columnSplitExpr; + if (config.useNull) { + columnSplitExpr = + relBuilder.call( + SqlStdOperatorTable.CASE, + nullCondition, + relBuilder.literal(config.nullStr), + lteCondition, + relBuilder.field(1), // col split + relBuilder.literal(config.otherStr)); + } else { + columnSplitExpr = + relBuilder.call( + SqlStdOperatorTable.CASE, + lteCondition, + relBuilder.field(1), + relBuilder.literal(config.otherStr)); + } + + String aggFieldName = relBuilder.peek().getRowType().getFieldNames().get(2); + relBuilder.project( + relBuilder.field(0), + relBuilder.alias(columnSplitExpr, columnSplitName), + relBuilder.field(2)); + String aggFunctionName = getAggFunctionName(node.getAggregationFunction()); + BuiltinFunctionName aggFunction = + BuiltinFunctionName.of(aggFunctionName) + .orElseThrow( + () -> + new IllegalArgumentException( + StringUtils.format( + "Unrecognized aggregation function: %s", aggFunctionName))); + relBuilder.aggregate( + relBuilder.groupKey(relBuilder.field(0), relBuilder.field(1)), + buildAggCall(context.relBuilder, aggFunction, relBuilder.field(2)).as(aggFieldName)); + // The output of chart is expected to be ordered by row and column split names + relBuilder.sort(relBuilder.field(0), relBuilder.field(1)); + return relBuilder.peek(); + } + + /** + * Aggregate by column split then rank by grand total (summed value of each category). The output + * is [col-split, grand-total, row-number] + */ + private RelNode rankByColumnSplit( + CalcitePlanContext context, int columnSplitOrdinal, int aggOrdinal, boolean top) { + RelBuilder relBuilder = context.relBuilder; + + relBuilder.project(relBuilder.field(columnSplitOrdinal), relBuilder.field(aggOrdinal)); + // Make sure that rows who don't have a column split not interfere grand total calculation + relBuilder.filter(relBuilder.isNotNull(relBuilder.field(0))); + final String GRAND_TOTAL_COL = "__grand_total__"; + relBuilder.aggregate( + relBuilder.groupKey(relBuilder.field(0)), + // Top-K semantic: Retain categories whose summed values are among the greatest + relBuilder.sum(relBuilder.field(1)).as(GRAND_TOTAL_COL)); // results: group key, agg calls + RexNode grandTotal = relBuilder.field(GRAND_TOTAL_COL); + // Apply sorting: keep the max values if top is set + if (top) { + grandTotal = relBuilder.desc(grandTotal); + } + // Always set it to null last so that nulls don't interfere with top / bottom calculation + grandTotal = relBuilder.nullsLast(grandTotal); + RexNode rowNum = + PlanUtils.makeOver( + context, + BuiltinFunctionName.ROW_NUMBER, + relBuilder.literal(1), // dummy expression for row number calculation + List.of(), + List.of(), + List.of(grandTotal), + WindowFrame.toCurrentRow()); + relBuilder.projectPlus(relBuilder.alias(rowNum, PlanUtils.ROW_NUMBER_COLUMN_FOR_CHART)); + return relBuilder.build(); + } + + @AllArgsConstructor + private static class ChartConfig { + private final int limit; + private final boolean top; + private final boolean useOther; + private final boolean useNull; + private final String otherStr; + private final String nullStr; + + static ChartConfig fromArguments(ArgumentMap argMap) { + int limit = (Integer) argMap.getOrDefault("limit", Chart.DEFAULT_LIMIT).getValue(); + boolean top = (Boolean) argMap.getOrDefault("top", Chart.DEFAULT_TOP).getValue(); + boolean useOther = + (Boolean) argMap.getOrDefault("useother", Chart.DEFAULT_USE_OTHER).getValue(); + boolean useNull = (Boolean) argMap.getOrDefault("usenull", Chart.DEFAULT_USE_NULL).getValue(); + String otherStr = + (String) argMap.getOrDefault("otherstr", Chart.DEFAULT_OTHER_STR).getValue(); + String nullStr = (String) argMap.getOrDefault("nullstr", Chart.DEFAULT_NULL_STR).getValue(); + return new ChartConfig(limit, top, useOther, useNull, otherStr, nullStr); + } + } + /** Transforms timechart command into SQL-based operations. */ @Override public RelNode visitTimechart( @@ -2384,11 +2582,11 @@ public RelNode visitTimechart( // Extract parameters UnresolvedExpression spanExpr = node.getBinExpression(); - List groupExprList = Arrays.asList(spanExpr); + List groupExprList; // Handle no by field case if (node.getByField() == null) { - String valueFunctionName = getValueFunctionName(node.getAggregateFunction()); + String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); // Create group expression list with just the timestamp span but use a different alias // to avoid @timestamp naming conflict @@ -2396,7 +2594,7 @@ public RelNode visitTimechart( simpleGroupExprList.add(new Alias("timestamp", spanExpr)); // Create agg expression list with the aggregate function List simpleAggExprList = - List.of(new Alias(valueFunctionName, node.getAggregateFunction())); + List.of(new Alias(aggFieldAlias, node.getAggregateFunction())); // Create an Aggregation object Aggregation aggregation = new Aggregation( @@ -2411,9 +2609,9 @@ public RelNode visitTimechart( context.relBuilder.push(result); // Reorder fields: timestamp first, then count context.relBuilder.project( - context.relBuilder.field("timestamp"), context.relBuilder.field(valueFunctionName)); + context.relBuilder.field("timestamp"), context.relBuilder.field(aggFieldAlias)); // Rename timestamp to @timestamp - context.relBuilder.rename(List.of("@timestamp", valueFunctionName)); + context.relBuilder.rename(List.of("@timestamp", aggFieldAlias)); context.relBuilder.sort(context.relBuilder.field(0)); return context.relBuilder.peek(); @@ -2422,7 +2620,7 @@ public RelNode visitTimechart( // Extract parameters for byField case UnresolvedExpression byField = node.getByField(); String byFieldName = ((Field) byField).getField().toString(); - String valueFunctionName = getValueFunctionName(node.getAggregateFunction()); + String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); int limit = Optional.ofNullable(node.getLimit()).orElse(10); boolean useOther = Optional.ofNullable(node.getUseOther()).orElse(true); @@ -2430,7 +2628,7 @@ public RelNode visitTimechart( try { // Step 1: Initial aggregation - IMPORTANT: order is [spanExpr, byField] groupExprList = Arrays.asList(spanExpr, byField); - aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context); + aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context, false); // First rename the timestamp field (2nd to last) to @timestamp List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); @@ -2449,11 +2647,11 @@ public RelNode visitTimechart( // Handle no limit case - just sort and return with proper field aliases if (limit == 0) { - // Add final projection with proper aliases: [@timestamp, byField, valueFunctionName] + // Add final projection with proper aliases: [@timestamp, byField, aggFieldAlias] context.relBuilder.project( context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), context.relBuilder.alias(context.relBuilder.field(1), byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); context.relBuilder.sort(context.relBuilder.field(0), context.relBuilder.field(1)); return context.relBuilder.peek(); } @@ -2463,36 +2661,67 @@ public RelNode visitTimechart( // Step 2: Find top N categories using window function approach (more efficient than separate // aggregation) - RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, context); + String aggFunctionName = getAggFunctionName(node.getAggregateFunction()); + Optional aggFuncNameOptional = BuiltinFunctionName.of(aggFunctionName); + if (aggFuncNameOptional.isEmpty()) { + throw new IllegalArgumentException( + StringUtils.format("Unrecognized aggregation function: %s", aggFunctionName)); + } + BuiltinFunctionName aggFunction = aggFuncNameOptional.get(); + RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, aggFunction, context); // Step 3: Apply OTHER logic with single pass return buildFinalResultWithOther( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); + completeResults, + topCategories, + byFieldName, + aggFunction, + aggFieldAlias, + useOther, + limit, + context); } catch (Exception e) { throw new RuntimeException("Error in visitTimechart: " + e.getMessage(), e); } } + private String getAggFunctionName(UnresolvedExpression aggregateFunction) { + if (aggregateFunction instanceof Alias alias) { + return getAggFunctionName(alias.getDelegated()); + } + return ((AggregateFunction) aggregateFunction).getFuncName(); + } + /** Build top categories query - simpler approach that works better with OTHER handling */ private RelNode buildTopCategoriesQuery( - RelNode completeResults, int limit, CalcitePlanContext context) { + RelNode completeResults, + int limit, + BuiltinFunctionName aggFunction, + CalcitePlanContext context) { context.relBuilder.push(completeResults); // Filter out null values when determining top categories - null should not count towards limit context.relBuilder.filter(context.relBuilder.isNotNull(context.relBuilder.field(1))); // Get totals for non-null categories - field positions: 0=@timestamp, 1=byField, 2=value + RexInputRef valueField = context.relBuilder.field(2); + AggCall call = buildAggCall(context.relBuilder, aggFunction, valueField); + context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as("grand_total")); + context.relBuilder.groupKey(context.relBuilder.field(1)), call.as("grand_total")); // Apply sorting and limit to non-null categories only - context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field("grand_total"))); + RexNode sortField = context.relBuilder.field("grand_total"); + // For MIN and EARLIEST, top results should be the minimum ones + sortField = + aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST + ? sortField + : context.relBuilder.desc(sortField); + context.relBuilder.sort(sortField); if (limit > 0) { context.relBuilder.limit(0, limit); } - return context.relBuilder.build(); } @@ -2501,18 +2730,25 @@ private RelNode buildFinalResultWithOther( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + BuiltinFunctionName aggFunction, + String aggFieldAlias, boolean useOther, int limit, CalcitePlanContext context) { // Use zero-filling for count aggregations, standard result for others - if (valueFunctionName.equals("count")) { + if (aggFieldAlias.equals("count")) { return buildZeroFilledResult( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); + completeResults, topCategories, byFieldName, aggFieldAlias, useOther, limit, context); } else { return buildStandardResult( - completeResults, topCategories, byFieldName, valueFunctionName, useOther, context); + completeResults, + topCategories, + byFieldName, + aggFunction, + aggFieldAlias, + useOther, + context); } } @@ -2521,7 +2757,8 @@ private RelNode buildStandardResult( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + BuiltinFunctionName aggFunctionName, + String aggFieldAlias, boolean useOther, CalcitePlanContext context) { @@ -2544,11 +2781,13 @@ private RelNode buildStandardResult( context.relBuilder.project( context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), context.relBuilder.alias(categoryExpr, byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); + RexInputRef valueField = context.relBuilder.field(2); + AggCall aggCall = buildAggCall(context.relBuilder, aggFunctionName, valueField); context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as(valueFunctionName)); + aggCall.as(aggFieldAlias)); applyFiltersAndSort(useOther, context); return context.relBuilder.peek(); @@ -2583,7 +2822,7 @@ private RelNode buildZeroFilledResult( RelNode completeResults, RelNode topCategories, String byFieldName, - String valueFunctionName, + String aggFieldAlias, boolean useOther, int limit, CalcitePlanContext context) { @@ -2622,7 +2861,7 @@ private RelNode buildZeroFilledResult( context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), "@timestamp"), context.relBuilder.alias(context.relBuilder.field(1), byFieldName), - context.relBuilder.alias(context.relBuilder.literal(0), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.literal(0), aggFieldAlias)); RelNode zeroFilledCombinations = context.relBuilder.build(); // Get actual results with OTHER logic applied @@ -2644,7 +2883,7 @@ private RelNode buildZeroFilledResult( context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), "@timestamp"), context.relBuilder.alias(actualCategoryExpr, byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); + context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), @@ -2659,12 +2898,30 @@ private RelNode buildZeroFilledResult( // Aggregate to combine actual and zero-filled data context.relBuilder.aggregate( context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as(valueFunctionName)); + context.relBuilder.sum(context.relBuilder.field(2)).as(aggFieldAlias)); applyFiltersAndSort(useOther, context); return context.relBuilder.peek(); } + /** + * Aggregate a field based on a given built-in aggregation function name. + * + *

It is intended for secondary aggregations in timechart and chart commands. Using it + * elsewhere may lead to unintended results. It handles explicitly only MIN, MAX, AVG, COUNT, + * DISTINCT_COUNT, EARLIEST, and LATEST. It sums the results for the rest aggregation types, + * assuming them to be accumulative. + */ + private AggCall buildAggCall( + RelBuilder relBuilder, BuiltinFunctionName aggFunction, RexNode node) { + return switch (aggFunction) { + case MIN, EARLIEST -> relBuilder.min(node); + case MAX, LATEST -> relBuilder.max(node); + case AVG -> relBuilder.avg(node); + default -> relBuilder.sum(node); + }; + } + @Override public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index fefab6d57ce..ded7ba541a4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -66,6 +66,7 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_MAIN = "_row_number_main_"; String ROW_NUMBER_COLUMN_FOR_SUBSEARCH = "_row_number_subsearch_"; String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; + String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { diff --git a/docs/category.json b/docs/category.json index 7ebe643373b..f126904da6a 100644 --- a/docs/category.json +++ b/docs/category.json @@ -50,6 +50,7 @@ "user/ppl/cmd/streamstats.rst", "user/ppl/cmd/subquery.rst", "user/ppl/cmd/syntax.rst", + "user/ppl/cmd/chart.rst", "user/ppl/cmd/timechart.rst", "user/ppl/cmd/search.rst", "user/ppl/functions/statistical.rst", diff --git a/docs/user/ppl/cmd/chart.rst b/docs/user/ppl/cmd/chart.rst new file mode 100644 index 00000000000..fe8aac4a3e0 --- /dev/null +++ b/docs/user/ppl/cmd/chart.rst @@ -0,0 +1,213 @@ +===== +chart +===== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +=========== + +The ``chart`` command transforms search results by applying a statistical aggregation function and optionally grouping the data by one or two fields. The results are suitable for visualization as a two-dimension chart when grouping by two fields, where unique values in the second group key can be pivoted to column names. + +Version +======= +3.4.0 + +Syntax +====== + +.. code-block:: text + + chart + [limit=(top|bottom) ] [useother=] [usenull=] [nullstr=] [otherstr=] + + [ by ] | [over ] [ by ] + +**Parameters:** + +* **limit**: optional. Specifies the number of categories to display when using column split. Each unique value in the column split field represents a category. + + * Default: top10 + * Syntax: ``limit=(top|bottom)`` or ``limit=`` (defaults to top) + * When ``limit=K`` is set, the top or bottom K categories from the column split field are retained; the remaining categories are grouped into an "OTHER" category if ``useother`` is not set to false. + * Set limit to 0 to show all categories without any limit. + * Use ``limit=topK`` or ``limit=bottomK`` to specify whether to retain the top or bottom K column categories. The ranking is based on the sum of aggregated values for each column category. For example, ``chart limit=top3 count() by region, product`` keeps the 3 products with the highest total counts across all regions. If not specified, top is used by default. + * Only applies when column split is present (by 2 fields or over...by... coexists). + +* **useother**: optional. Controls whether to create an "OTHER" category for categories beyond the limit. + + * Default: true + * When set to false, only the top/bottom N categories (based on limit) are shown without an "OTHER" category. + * When set to true, categories beyond the limit are grouped into an "OTHER" category. + * Only applies when using column split and when there are more categories than the limit. + +* **usenull**: optional. Controls whether to group events without a column split (i.e. whose column split is null) into a separate "NULL" category. + + * Default: true + * ``usenull`` only applies to column split. + * Row split should always be non-null value. Documents with null values in row split will be ignored. + * When ``usenull=false``, events with a null column split are excluded from results. + * When ``usenull=true``, events with a null column split are grouped into a separate "NULL" category. + +* **nullstr**: optional. Specifies the category name for rows that do not contain the column split value. + + * Default: "NULL" + * Only applies when ``usenull`` is set to true. + +* **otherstr**: optional. Specifies the category name for the "OTHER" category. + + * Default: "OTHER" + * Only applies when ``useother`` is set to true and there are values beyond the limit. + +* **aggregation_function**: mandatory. The aggregation function to apply to the data. + + * Currently, only a single aggregation function is supported. + * Available functions: aggregation functions supported by the `stats `_ command. + +* **by**: optional. Groups the results by either one field (row split) or two fields (row split and column split) + + * ``limit``, ``useother``, and ``usenull`` apply to the column split + * Results are returned as individual rows for each combination. + * If not specified, the aggregation is performed across all documents. + +* **over...by...**: optional. Alternative syntax for grouping by multiple fields. + + * ``over by `` groups the results by both fields. + * Using ``over`` alone on one field is equivalent to ``by `` + +Notes +===== + +* The fields generated by column splitting are converted to strings so that they are compatible with ``nullstr`` and ``otherstr`` and can be used as column names once pivoted. +* Documents with null values in fields used by the aggregation function are excluded from aggregation. For example, in ``chart avg(balance) over deptno, group``, documents where ``balance`` is null are excluded from the average calculation. +* The aggregation metric appears as the last column in the result. Result columns are ordered as: [row-split] [column-split] [aggregation-metrics]. + +Examples +======== + +Example 1: Basic aggregation without grouping +--------------------------------------------- + +This example calculates the average balance across all accounts. + +PPL query:: + + os> source=accounts | chart avg(balance) + fetched rows / total rows = 1/1 + +--------------+ + | avg(balance) | + |--------------| + | 20482.25 | + +--------------+ + +Example 2: Group by single field +-------------------------------- + +This example calculates the count of accounts grouped by gender. + +PPL query:: + + os> source=accounts | chart count() by gender + fetched rows / total rows = 2/2 + +--------+---------+ + | gender | count() | + |--------+---------| + | F | 1 | + | M | 3 | + +--------+---------+ + +Example 3: Using over and by for multiple field grouping +-------------------------------------------------------- + +This example shows average balance grouped by both gender and age fields. Note that the age column in the result is converted to string type. + +PPL query:: + + os> source=accounts | chart avg(balance) over gender by age + fetched rows / total rows = 4/4 + +--------+-----+--------------+ + | gender | age | avg(balance) | + |--------+-----+--------------| + | F | 28 | 32838.0 | + | M | 32 | 39225.0 | + | M | 33 | 4180.0 | + | M | 36 | 5686.0 | + +--------+-----+--------------+ + +Example 4: Using basic limit functionality +------------------------------------------ + +This example limits the results to show only the top 1 age group. Note that the age column in the result is converted to string type. + +PPL query:: + + os> source=accounts | chart limit=1 count() over gender by age + fetched rows / total rows = 3/3 + +--------+-------+---------+ + | gender | age | count() | + |--------+-------+---------| + | F | OTHER | 1 | + | M | 33 | 1 | + | M | OTHER | 2 | + +--------+-------+---------+ + +Example 5: Using limit with other parameters +-------------------------------------------- + +This example shows using limit with useother and custom otherstr parameters. + +PPL query:: + + os> source=accounts | chart limit=top1 useother=true otherstr='minor_gender' count() over state by gender + fetched rows / total rows = 4/4 + +-------+--------------+---------+ + | state | gender | count() | + |-------+--------------+---------| + | IL | M | 1 | + | MD | M | 1 | + | TN | M | 1 | + | VA | minor_gender | 1 | + +-------+--------------+---------+ + +Example 6: Using null parameters +-------------------------------- + +This example shows using limit with usenull and custom nullstr parameters. + +PPL query:: + + os> source=accounts | chart usenull=true nullstr='employer not specified' count() over firstname by employer + fetched rows / total rows = 4/4 + +-----------+------------------------+---------+ + | firstname | employer | count() | + |-----------+------------------------+---------| + | Amber | Pyrami | 1 | + | Dale | employer not specified | 1 | + | Hattie | Netagy | 1 | + | Nanette | Quility | 1 | + +-----------+------------------------+---------+ + +Example 7: Using chart command with span +---------------------------------------- + +This example demonstrates using span for grouping age ranges. + +PPL query:: + + os> source=accounts | chart max(balance) by age span=10, gender + fetched rows / total rows = 2/2 + +-----+--------+--------------+ + | age | gender | max(balance) | + |-----+--------+--------------| + | 20 | F | 32838 | + | 30 | M | 39225 | + +-----+--------+--------------+ + +Limitations +=========== +* Only a single aggregation function is supported per chart command. diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 697ec7e2c6e..04a31827579 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -119,6 +119,8 @@ The query start with search command and then flowing a set of command delimited - `reverse command `_ - `table command `_ + + - `chart command `_ - `timechart command `_ diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java new file mode 100644 index 00000000000..b325912e7d7 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java @@ -0,0 +1,331 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; +import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteChartCommandIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK); + loadIndex(Index.BANK_WITH_NULL_VALUES); + loadIndex(Index.OTELLOGS); + loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.EVENTS_NULL); + } + + @Test + public void testChartWithSingleGroupKey() throws IOException { + JSONObject result1 = + executeQuery(String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK)); + verifySchema(result1, schema("gender", "string"), schema("avg(balance)", "double")); + verifyDataRows(result1, rows("F", 40488), rows("M", 16377.25)); + JSONObject result2 = + executeQuery(String.format("source=%s | chart avg(balance) over gender", TEST_INDEX_BANK)); + assertJsonEquals(result1.toString(), result2.toString()); + } + + @Test + public void testChartWithMultipleGroupKeys() throws IOException { + JSONObject result1 = + executeQuery( + String.format("source=%s | chart avg(balance) over gender by age", TEST_INDEX_BANK)); + verifySchema( + result1, + schema("gender", "string"), + schema("age", "string"), + schema("avg(balance)", "double")); + verifyDataRows( + result1, + rows("F", "28", 32838), + rows("F", "39", 40540), + rows("M", "32", 39225), + rows("M", "33", 4180), + rows("M", "36", 11052), + rows("F", "34", 48086)); + JSONObject result2 = + executeQuery( + String.format("source=%s | chart avg(balance) by gender, age", TEST_INDEX_BANK)); + assertJsonEquals(result1.toString(), result2.toString()); + } + + @Test + public void testChartCombineOverByWithLimit0() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=0 avg(balance) over state by gender", TEST_INDEX_BANK)); + verifySchema( + result, + schema("state", "string"), + schema("gender", "string"), + schema("avg(balance)", "double")); + verifyDataRows( + result, + rows("IL", "M", 39225.0), + rows("IN", "F", 48086.0), + rows("MD", "M", 4180.0), + rows("PA", "F", 40540.0), + rows("TN", "M", 5686.0), + rows("VA", "F", 32838.0), + rows("WA", "M", 16418.0)); + } + + @Test + public void testChartMaxBalanceByAgeSpan() throws IOException { + JSONObject result = + executeQuery( + String.format("source=%s | chart max(balance) by age span=10", TEST_INDEX_BANK)); + verifySchema(result, schema("age", "int"), schema("max(balance)", "bigint")); + verifyDataRows(result, rows(20, 32838), rows(30, 48086)); + } + + @Test + public void testChartMaxValueOverTimestampSpanWeekByCategory() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) over timestamp span=1week by category", + TEST_INDEX_TIME_DATA)); + verifySchema( + result, + schema("timestamp", "timestamp"), + schema("category", "string"), + schema("max(value)", "int")); + // Data spans from 2025-07-28 to 2025-08-01, all within same week + verifyDataRows( + result, + rows("2025-07-28 00:00:00", "A", 9367), + rows("2025-07-28 00:00:00", "B", 9521), + rows("2025-07-28 00:00:00", "C", 9187), + rows("2025-07-28 00:00:00", "D", 8736)); + } + + @Test + public void testChartMaxValueOverCategoryByTimestampSpanWeek() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) over category by timestamp span=1week", + TEST_INDEX_TIME_DATA)); + verifySchema( + result, + schema("category", "string"), + schema("timestamp", "string"), + schema("max(value)", "int")); + // All data within same week span + verifyDataRows( + result, + rows("A", "2025-07-28 00:00:00", 9367), + rows("B", "2025-07-28 00:00:00", 9521), + rows("C", "2025-07-28 00:00:00", 9187), + rows("D", "2025-07-28 00:00:00", 8736)); + } + + @Test + public void testChartMaxValueByTimestampSpanDayAndWeek() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart max(value) by timestamp span=1day, @timestamp span=2weeks", + TEST_INDEX_TIME_DATA)); + // column split are converted to string in order to be compatible with nullstr and otherstr + verifySchema( + result, + schema("timestamp", "timestamp"), + schema("@timestamp", "string"), + schema("max(value)", "int")); + // Data grouped by day spans + verifyDataRows( + result, + rows("2025-07-28 00:00:00", "2025-07-28 00:00:00", 9367), + rows("2025-07-29 00:00:00", "2025-07-28 00:00:00", 9521), + rows("2025-07-30 00:00:00", "2025-07-28 00:00:00", 9234), + rows("2025-07-31 00:00:00", "2025-07-28 00:00:00", 9318), + rows("2025-08-01 00:00:00", "2025-07-28 00:00:00", 9015)); + } + + @Test + public void testChartLimit0WithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=0 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); + verifyDataRows( + result, + rows(0, "DEBUG", 5), + rows(0, "DEBUG2", 6), + rows(0, "DEBUG3", 7), + rows(0, "DEBUG4", 8), + rows(0, "ERROR", 17), + rows(0, "ERROR2", 18), + rows(0, "ERROR3", 19), + rows(0, "ERROR4", 20), + rows(0, "FATAL", 21), + rows(0, "FATAL2", 22), + rows(0, "FATAL3", 23), + rows(0, "FATAL4", 24), + rows(0, "INFO", 9), + rows(0, "INFO2", 10), + rows(0, "INFO3", 11), + rows(0, "INFO4", 12), + rows(0, "TRACE2", 2), + rows(0, "TRACE3", 3), + rows(0, "TRACE4", 4), + rows(0, "WARN", 13), + rows(0, "WARN2", 14), + rows(0, "WARN3", 15), + rows(0, "WARN4", 16), + rows(1, "ERROR", 17), + rows(1, "INFO", 9), + rows(1, "TRACE", 1)); + } + + @Test + public void testChartLimitTopWithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=top2 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); + verifyDataRows( + result, + rows(0, "ERROR", 17), + rows(0, "FATAL4", 24), + rows(0, "max_among_other", 23), + rows(1, "ERROR", 17), + rows(1, "max_among_other", 9)); + } + + @Test + public void testChartLimitBottomWithUseOther() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=bottom2 useother=false otherstr='other_small_not_shown'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("max(severityNumber)", "bigint")); + verifyDataRows(result, rows(1, "TRACE", 1), rows(0, "TRACE2", 2)); + } + + @Test + public void testChartLimitTopWithMinAgg() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart limit=top2 min(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + result, + schema("flags", "bigint"), + schema("severityText", "string"), + schema("min(severityNumber)", "bigint")); + verifyDataRowsInOrder( + result, + rows(0, "ERROR", 17), + rows(0, "FATAL4", 24), + rows(0, "OTHER", 2), + rows(1, "ERROR", 17), + rows(1, "OTHER", 1)); + } + + @Test + public void testChartUseNullTrueWithNullStr() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart nullstr='nil' avg(balance) over gender by age span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema( + result, + schema("gender", "string"), + schema("age", "string"), + schema("avg(balance)", "double")); + verifyDataRows( + result, rows("M", "30", 21702.5), rows("F", "30", 48086.0), rows("F", "20", 32838.0)); + } + + @Test + public void testChartWithNullAndLimit() throws IOException { + JSONObject result = + executeQuery("source=events_null | chart limit=3 count() over @timestamp span=1d by host"); + + verifySchema( + result, + schema("@timestamp", "timestamp"), + schema("host", "string"), + schema("count()", "bigint")); + + verifyDataRows( + result, + rows("2024-07-01 00:00:00", "db-01", 1), + rows("2024-07-01 00:00:00", "web-01", 2), + rows("2024-07-01 00:00:00", "web-02", 2), + rows("2024-07-01 00:00:00", "NULL", 1)); + } + + @Test + public void testChartUseNullFalseWithNullStr() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | chart usenull=false nullstr='not_shown' count() over gender by age" + + " span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema( + result, schema("gender", "string"), schema("age", "string"), schema("count()", "bigint")); + verifyDataRows(result, rows("M", "30", 4), rows("F", "30", 1), rows("F", "20", 1)); + } + + @Test + public void testChartNullsInRowSplitShouldBeIgnored() throws IOException { + JSONObject result = executeQuery("source=events_null | chart min(cpu_usage) by host region"); + verifySchema( + result, + schema("host", "string"), + schema("region", "string"), + schema("min(cpu_usage)", "double")); + verifyDataRows( + result, + rows("db-01", "eu-west", 42.1), + rows("web-01", "us-east", 45.2), + rows("web-02", "us-west", 38.7)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index ffeabe8a79b..0b103fd9fdc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -7,8 +7,10 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; @@ -31,6 +33,7 @@ public void init() throws Exception { enableCalcite(); setQueryBucketSize(1000); loadIndex(Index.BANK_WITH_STRING_VALUES); + loadIndex(Index.BANK_WITH_NULL_VALUES); loadIndex(Index.NESTED_SIMPLE); loadIndex(Index.TIME_TEST_DATA); loadIndex(Index.TIME_TEST_DATA2); @@ -1437,6 +1440,70 @@ public void testPushDownMinOrMaxAggOnDerivedField() throws IOException { TEST_INDEX_ACCOUNT))); } + @Test + public void testExplainChartWithSingleGroupKey() throws IOException { + assertYamlEqualsIgnoreId( + loadExpectedPlan("chart_single_group_key.yaml"), + explainQueryYaml( + String.format("source=%s | chart avg(balance) by gender", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("chart_with_integer_span.yaml"), + explainQueryYaml( + String.format("source=%s | chart max(balance) by age span=10", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("chart_with_timestamp_span.yaml"), + explainQueryYaml( + String.format( + "source=%s | chart count by @timestamp span=1day", TEST_INDEX_TIME_DATA))); + } + + @Test + public void testExplainChartWithMultipleGroupKeys() throws IOException { + assertYamlEqualsIgnoreId( + loadExpectedPlan("chart_multiple_group_keys.yaml"), + explainQueryYaml( + String.format("source=%s | chart avg(balance) over gender by age", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("chart_timestamp_span_and_category.yaml"), + explainQueryYaml( + String.format( + "source=%s | chart max(value) over timestamp span=1week by category", + TEST_INDEX_TIME_DATA))); + } + + @Test + public void testExplainChartWithLimits() throws IOException { + String expected = loadExpectedPlan("chart_with_limit.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | chart limit=0 avg(balance) over state by gender", TEST_INDEX_BANK))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("chart_use_other.yaml"), + explainQueryYaml( + String.format( + "source=%s | chart limit=2 useother=true otherstr='max_among_other'" + + " max(severityNumber) over flags by severityText", + TEST_INDEX_OTEL_LOGS))); + } + + @Test + public void testExplainChartWithNullStr() throws IOException { + String expected = loadExpectedPlan("chart_null_str.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | chart limit=10 usenull=true nullstr='nil' avg(balance) over gender by" + + " age span=10", + TEST_INDEX_BANK_WITH_NULL_VALUES))); + } + @Test public void testCasePushdownAsRangeQueryExplain() throws IOException { // CASE 1: Range - Metric diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java index 3b4ca27dab5..4d9352e9e87 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java @@ -183,27 +183,13 @@ public void testTimechartWithLimit() throws IOException { schema("host", "string"), schema("avg(cpu_usage)", "double")); - // Verify we have rows for web-01, web-02, and OTHER - boolean foundWeb01 = false; - boolean foundWeb02 = false; - boolean foundOther = false; - - for (int i = 0; i < result.getJSONArray("datarows").length(); i++) { - Object[] row = result.getJSONArray("datarows").getJSONArray(i).toList().toArray(); - String label = (String) row[1]; - - if ("web-01".equals(label)) { - foundWeb01 = true; - } else if ("web-02".equals(label)) { - foundWeb02 = true; - } else if ("OTHER".equals(label)) { - foundOther = true; - } - } - - assertTrue("web-01 not found in results", foundWeb01); - assertTrue("web-02 not found in results", foundWeb02); - assertTrue("OTHER category not found in results", foundOther); + verifyDataRows( + result, + rows("2024-07-01 00:00:00", "web-01", 45.2), + rows("2024-07-01 00:01:00", "OTHER", 38.7), + rows("2024-07-01 00:02:00", "web-01", 55.3), + rows("2024-07-01 00:03:00", "db-01", 42.1), + rows("2024-07-01 00:04:00", "OTHER", 41.8)); } @Test @@ -383,7 +369,7 @@ public void testTimechartWithLimitAndUseOther() throws IOException { if ("OTHER".equals(host)) { foundOther = true; - assertEquals(330.4, cpuUsage, 0.1); + assertEquals(41.3, cpuUsage, 0.1); } else if ("web-03".equals(host)) { foundWeb03 = true; assertEquals(55.3, cpuUsage, 0.1); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_multiple_group_keys.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_multiple_group_keys.yaml new file mode 100644 index 00000000000..d692ba70f69 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_multiple_group_keys.yaml @@ -0,0 +1,36 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[SAFE_CAST($t1)], gender=[$t0], age=[$t3], avg(balance)=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[gender, balance, age], FILTER->AND(IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["gender","balance","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[SAFE_CAST($t0)], expr#3=[IS NOT NULL($t2)], age=[$t2], avg(balance)=[$t1], $condition=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[gender, balance, age], FILTER->AND(IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[age, avg(balance)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["gender","balance","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_null_str.yaml new file mode 100644 index 00000000000..70ca93b7ddf --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_null_str.yaml @@ -0,0 +1,40 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'nil', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], gender=[$t1], balance=[$t0], age0=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[balance, gender, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], expr#11=[IS NOT NULL($t4)], age=[$t4], avg(balance)=[$t10], $condition=[$t11]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[10], expr#4=[null:NULL], expr#5=[SPAN($t2, $t3, $t4)], gender=[$t1], balance=[$t0], age0=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]], PushDownContext=[[PROJECT->[balance, gender, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_single_group_key.yaml new file mode 100644 index 00000000000..b011edc42f5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_single_group_key.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[FILTER->AND(IS NOT NULL($4), IS NOT NULL($7)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml new file mode 100644 index 00000000000..ef2320d13c5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_timestamp_span_and_category.yaml @@ -0,0 +1,32 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + LogicalProject(timestamp=[$0], category=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], max(value)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(timestamp=[$1], category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + LogicalFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + LogicalProject(category=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + LogicalFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], timestamp=[$t0], category=[$t10], max(value)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp], FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[timestamp0, category, max(value)], SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"timestamp","boost":1.0}},{"exists":{"field":"value","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["category","value","timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp], FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},max(value)=MAX($1)), PROJECT->[category, max(value)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"timestamp","boost":1.0}},{"exists":{"field":"value","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"category","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["category","value","timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"timestamp0":{"date_histogram":{"field":"timestamp","missing_bucket":false,"order":"asc","calendar_interval":"1w"}}}]},"aggregations":{"max(value)":{"max":{"field":"value"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_use_other.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_use_other.yaml new file mode 100644 index 00000000000..4ee0759468f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_use_other.yaml @@ -0,0 +1,31 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$0], severityText=[CASE(IS NULL($1), 'NULL', <=($5, 2), $1, 'max_among_other')], max(severityNumber)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + LogicalFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($163))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalProject(severityText=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(severityText=[$1], max(severityNumber)=[$2]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + LogicalFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($163))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], flags=[$t0], severityText=[$t10], max(severityNumber)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[severityText, flags, severityNumber], FILTER->AND(IS NOT NULL($1), IS NOT NULL($2)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},max(severityNumber)=MAX($2)), SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"flags","boost":1.0}},{"exists":{"field":"severityNumber","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["severityText","flags","severityNumber"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"flags":{"terms":{"field":"flags","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"max(severityNumber)":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], severityText=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]], PushDownContext=[[PROJECT->[severityText, flags, severityNumber], FILTER->AND(IS NOT NULL($1), IS NOT NULL($2)), FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},max(severityNumber)=MAX($2)), PROJECT->[severityText, max(severityNumber)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"flags","boost":1.0}},{"exists":{"field":"severityNumber","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"severityText","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["severityText","flags","severityNumber"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"flags":{"terms":{"field":"flags","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"severityText":{"terms":{"field":"severityText","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"max(severityNumber)":{"max":{"field":"severityNumber"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_integer_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_integer_span.yaml new file mode 100644 index 00000000000..c47fb9dd438 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_integer_span.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalProject(age=[$0], max(balance)=[$1]) + LogicalAggregate(group=[{1}], max(balance)=[MAX($0)]) + LogicalProject(balance=[$7], age0=[SPAN($10, 10, null:NULL)]) + LogicalFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},max(balance)=MAX($0)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"age","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age0":{"histogram":{"field":"age","missing_bucket":false,"order":"asc","interval":10.0}}}]},"aggregations":{"max(balance)":{"max":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_limit.yaml new file mode 100644 index 00000000000..389825459df --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_limit.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], gender=[$4], balance=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[gender, balance, state], FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"state","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["gender","balance","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml new file mode 100644 index 00000000000..cdce175e83a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalProject(@timestamp=[$0], count=[$1]) + LogicalAggregate(group=[{0}], count=[COUNT()]) + LogicalProject(@timestamp0=[SPAN($0, 1, 'd')]) + LogicalFilter(condition=[IS NOT NULL($0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp], FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml index e0d54375e15..0c34a565cdc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], t=[DATE_ADD($3, 1:INTERVAL DAY)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SCRIPT->IS NOT NULL(DATE_ADD($3, 1:INTERVAL DAY)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(t,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AyV7CiAgIm9wIjogewogICAgIm5hbWUiOiAiSVMgTk9UIE5VTEwiLAogICAgImtpbmQiOiAiSVNfTk9UX05VTEwiLAogICAgInN5bnRheCI6ICJQT1NURklYIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiREFURV9BREQiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAJYmlydGhkYXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0ZVR5cGWeLVKuEH3KrwIAAUwAB2Zvcm1hdHN0ABBMamF2YS91dGlsL0xpc3Q7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXN0AA9MamF2YS91dGlsL01hcDt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQACVRJTUVTVEFNUH5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+ABJ0AAREYXRlc3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAZeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAbAAAAAHNxAH4AAAAAAAF3BAAAAAB4eHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(t,1d)":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0BQN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiU1BBTiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiREFURV9BREQiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogImQiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICJwcmVjaXNpb24iOiAtMQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACWJpcnRoZGF0ZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzdAAPTGphdmEvdXRpbC9NYXA7eHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAlUSU1FU1RBTVB+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgASdAAERGF0ZXNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGXhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGwAAAABzcQB+AAAAAAABdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SCRIPT->IS NOT NULL(DATE_ADD($3, 1:INTERVAL DAY)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(t,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AyV7CiAgIm9wIjogewogICAgIm5hbWUiOiAiSVMgTk9UIE5VTEwiLAogICAgImtpbmQiOiAiSVNfTk9UX05VTEwiLAogICAgInN5bnRheCI6ICJQT1NURklYIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiREFURV9BREQiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAJYmlydGhkYXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0ZVR5cGWeLVKuEH3KrwIAAUwAB2Zvcm1hdHN0ABBMamF2YS91dGlsL0xpc3Q7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXN0AA9MamF2YS91dGlsL01hcDt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQACVRJTUVTVEFNUH5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+ABJ0AAREYXRlc3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAZeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAbAAAAAHNxAH4AAAAAAAF3BAAAAAB4eHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(t,1d)":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0BQN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiU1BBTiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiREFURV9BREQiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogImQiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICJwcmVjaXNpb24iOiAtMQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACWJpcnRoZGF0ZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzdAAPTGphdmEvdXRpbC9NYXA7eHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAlUSU1FU1RBTVB+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgASdAAERGF0ZXNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGXhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGwAAAABzcQB+AAAAAAABdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":false,"value_type":"long","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml index f212b4c8bfd..a315860aac9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) + LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[AVG($2)]) LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) @@ -10,7 +10,7 @@ calcite: LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) + LogicalAggregate(group=[{1}], grand_total=[AVG($2)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) @@ -19,19 +19,21 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) - EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) - EnumerableSort(sort0=[$1], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{0}], grand_total=[SUM($1)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[10]) + EnumerableSort(sort0=[$1], dir0=[DESC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], host=[$t0], grand_total=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_multiple_group_keys.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_multiple_group_keys.yaml new file mode 100644 index 00000000000..95e83cdcd19 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_multiple_group_keys.yaml @@ -0,0 +1,40 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t4)], expr#20=[IS NOT NULL($t7)], expr#21=[AND($t19, $t20)], proj#0..18=[{exprs}], $condition=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], age=[$t4], avg(balance)=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t4)], expr#20=[IS NOT NULL($t7)], expr#21=[SAFE_CAST($t10)], expr#22=[IS NOT NULL($t21)], expr#23=[AND($t19, $t20, $t22)], proj#0..18=[{exprs}], $condition=[$t23]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_multiple_groups.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_multiple_groups.yaml new file mode 100644 index 00000000000..df3fd8391d5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_multiple_groups.yaml @@ -0,0 +1,37 @@ +logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT], sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)]) + LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')]) + LogicalJoin(condition=[=($2, $3)], joinType=[left]) + LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)]) + LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$4], age=[$10], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + +physical: | + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10]) + EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left]) + EnumerableSort(sort0=[$2], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], gender=[$t0], age=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7]) + EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], expr#10=[SAFE_CAST($t1)], avg(balance)=[$t9], age=[$t10]) + EnumerableAggregate(group=[{4, 10}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_null_str.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_null_str.yaml new file mode 100644 index 00000000000..274186e377e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_null_str.yaml @@ -0,0 +1,40 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'nil', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + LogicalProject(age=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$3], age0=[SPAN($5, 10, null:NULL)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['nil'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], gender=[$t0], age=[$t4], avg(balance)=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[10], expr#14=[null:NULL], expr#15=[SPAN($t5, $t13, $t14)], expr#16=[IS NOT NULL($t4)], expr#17=[IS NOT NULL($t3)], expr#18=[AND($t16, $t17)], gender=[$t4], balance=[$t3], age0=[$t15], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[SAFE_CAST($t1)], expr#5=[0], expr#6=[=($t3, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t2)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t3)], age=[$t4], avg(balance)=[$t10]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[10], expr#14=[null:NULL], expr#15=[SPAN($t5, $t13, $t14)], expr#16=[IS NOT NULL($t4)], expr#17=[IS NOT NULL($t3)], expr#18=[SAFE_CAST($t15)], expr#19=[IS NOT NULL($t18)], expr#20=[AND($t16, $t17, $t19)], gender=[$t4], balance=[$t3], age0=[$t15], $condition=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank_with_null_values]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_single_group.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_single_group.yaml new file mode 100644 index 00000000000..208fdf99935 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_single_group.yaml @@ -0,0 +1,15 @@ +logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT], sort0=[$0], dir0=[ASC]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalProject(avg(balance)=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + +physical: | + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(balance)=[$t8], gender=[$t0]) + EnumerableAggregate(group=[{4}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_single_group_key.yaml new file mode 100644 index 00000000000..8224f075819 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_single_group_key.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(gender=[$4], balance=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], gender=[$t0], avg(balance)=[$t8]) + EnumerableAggregate(group=[{4}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t4)], expr#20=[IS NOT NULL($t7)], expr#21=[AND($t19, $t20)], proj#0..18=[{exprs}], $condition=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_timestamp_span_and_category.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_timestamp_span_and_category.yaml new file mode 100644 index 00000000000..76b833ce3f1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_timestamp_span_and_category.yaml @@ -0,0 +1,38 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + LogicalProject(timestamp=[$0], category=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], max(value)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(timestamp=[$1], category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + LogicalFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + LogicalProject(category=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(category=[$0], max(value)=[$2]) + LogicalAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + LogicalProject(category=[$1], value=[$2], timestamp0=[SPAN($3, 1, 'w')]) + LogicalFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableAggregate(group=[{0, 1}], max(value)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], timestamp=[$t0], category=[$t10], max(value)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], timestamp=[$t1], category=[$t0], max(value)=[$t2]) + EnumerableAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[1], expr#11=['w'], expr#12=[SPAN($t3, $t10, $t11)], expr#13=[IS NOT NULL($t3)], expr#14=[IS NOT NULL($t2)], expr#15=[AND($t13, $t14)], category=[$t1], value=[$t2], timestamp0=[$t12], $condition=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], category=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($2)]) + EnumerableAggregate(group=[{0, 2}], max(value)=[MAX($1)]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[1], expr#11=['w'], expr#12=[SPAN($t3, $t10, $t11)], expr#13=[IS NOT NULL($t3)], expr#14=[IS NOT NULL($t2)], expr#15=[IS NOT NULL($t1)], expr#16=[AND($t13, $t14, $t15)], category=[$t1], value=[$t2], timestamp0=[$t12], $condition=[$t16]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_use_other.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_use_other.yaml new file mode 100644 index 00000000000..027d0e30124 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_use_other.yaml @@ -0,0 +1,37 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$0], severityText=[CASE(IS NULL($1), 'NULL', <=($5, 2), $1, 'max_among_other')], max(severityNumber)=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + LogicalFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($163))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + LogicalProject(severityText=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(severityText=[$1], max(severityNumber)=[$2]) + LogicalAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + LogicalProject(flags=[$23], severityText=[$7], severityNumber=[$163]) + LogicalFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($163))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) + EnumerableAggregate(group=[{0, 1}], max(severityNumber)=[MAX($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[2], expr#8=[<=($t4, $t7)], expr#9=['max_among_other'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], flags=[$t0], severityText=[$t10], max(severityNumber)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], flags=[$t1], severityText=[$t0], max(severityNumber)=[$t2]) + EnumerableAggregate(group=[{7, 23}], max(severityNumber)=[MAX($163)]) + EnumerableCalc(expr#0..171=[{inputs}], expr#172=[IS NOT NULL($t23)], expr#173=[IS NOT NULL($t163)], expr#174=[AND($t172, $t173)], proj#0..171=[{exprs}], $condition=[$t174]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], severityText=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($2)]) + EnumerableAggregate(group=[{7, 23}], max(severityNumber)=[MAX($163)]) + EnumerableCalc(expr#0..171=[{inputs}], expr#172=[IS NOT NULL($t23)], expr#173=[IS NOT NULL($t163)], expr#174=[IS NOT NULL($t7)], expr#175=[AND($t172, $t173, $t174)], proj#0..171=[{exprs}], $condition=[$t175]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_otel_logs]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_integer_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_integer_span.yaml new file mode 100644 index 00000000000..5e3a5d0ba33 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_integer_span.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalProject(age=[$0], max(balance)=[$1]) + LogicalAggregate(group=[{1}], max(balance)=[MAX($0)]) + LogicalProject(balance=[$7], age0=[SPAN($10, 10, null:NULL)]) + LogicalFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableAggregate(group=[{1}], max(balance)=[MAX($0)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[10], expr#20=[null:NULL], expr#21=[SPAN($t10, $t19, $t20)], expr#22=[IS NOT NULL($t10)], expr#23=[IS NOT NULL($t7)], expr#24=[AND($t22, $t23)], balance=[$t7], age0=[$t21], $condition=[$t24]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_limit.yaml new file mode 100644 index 00000000000..16aa3871687 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_limit.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], gender=[$4], balance=[$7]) + LogicalFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], state=[$t1], gender=[$t0], avg(balance)=[$t9]) + EnumerableAggregate(group=[{4, 9}], agg#0=[$SUM0($7)], agg#1=[COUNT($7)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t9)], expr#20=[IS NOT NULL($t7)], expr#21=[AND($t19, $t20)], proj#0..18=[{exprs}], $condition=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_timestamp_span.yaml new file mode 100644 index 00000000000..a8bd9d61f77 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/chart_with_timestamp_span.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC]) + LogicalProject(@timestamp=[$0], count=[$1]) + LogicalAggregate(group=[{0}], count=[COUNT()]) + LogicalProject(@timestamp0=[SPAN($0, 1, 'd')]) + LogicalFilter(condition=[IS NOT NULL($0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableAggregate(group=[{0}], count=[COUNT()]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[1], expr#11=['d'], expr#12=[SPAN($t0, $t10, $t11)], expr#13=[IS NOT NULL($t0)], @timestamp0=[$t12], $condition=[$t13]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml index ae966d7eea7..5aa55ca656b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) + LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[AVG($2)]) LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) @@ -10,7 +10,7 @@ calcite: LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) + LogicalAggregate(group=[{1}], grand_total=[AVG($2)]) LogicalFilter(condition=[IS NOT NULL($1)]) LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) @@ -19,19 +19,21 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - EnumerableAggregate(group=[{0, 1}], avg(cpu_usage)=[SUM($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) - EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) - EnumerableSort(sort0=[$1], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) - EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{0}], grand_total=[SUM($2)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], expr#9=[IS NOT NULL($t0)], proj#0..1=[{exprs}], $f2=[$t8], $condition=[$t9]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableLimit(fetch=[10]) + EnumerableSort(sort0=[$1], dir0=[DESC]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], host=[$t0], grand_total=[$t7]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], expr#9=[IS NOT NULL($t0)], proj#0..1=[{exprs}], $f2=[$t8], $condition=[$t9]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 05fe2469b3f..d7272a47a0f 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -46,6 +46,7 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; TRENDLINE: 'TRENDLINE'; +CHART: 'CHART'; TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; EXPAND: 'EXPAND'; @@ -77,6 +78,7 @@ RIGHT_HINT: 'HINT.RIGHT'; // COMMAND ASSIST KEYWORDS AS: 'AS'; BY: 'BY'; +OVER: 'OVER'; SOURCE: 'SOURCE'; INDEX: 'INDEX'; A: 'A'; @@ -93,6 +95,8 @@ COST: 'COST'; EXTENDED: 'EXTENDED'; OVERRIDE: 'OVERRIDE'; OVERWRITE: 'OVERWRITE'; +TOP_K: 'TOP'[0-9]+; +BOTTOM_K: 'BOTTOM'[0-9]+; // SORT FIELD KEYWORDS // TODO #3180: Fix broken sort functionality @@ -138,6 +142,8 @@ COUNTFIELD: 'COUNTFIELD'; SHOWCOUNT: 'SHOWCOUNT'; LIMIT: 'LIMIT'; USEOTHER: 'USEOTHER'; +OTHERSTR: 'OTHERSTR'; +NULLSTR: 'NULLSTR'; INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 66a7bee9da4..c67ca41b5f1 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -77,6 +77,7 @@ commands | flattenCommand | reverseCommand | regexCommand + | chartCommand | timechartCommand | rexCommand | replaceCommand @@ -287,6 +288,28 @@ reverseCommand : REVERSE ; +chartCommand + : CHART chartOptions* statsAggTerm (OVER rowSplit)? (BY columnSplit)? + | CHART chartOptions* statsAggTerm BY rowSplit (COMMA)? columnSplit + ; + +chartOptions + : LIMIT EQUAL integerLiteral + | LIMIT EQUAL (TOP_K | BOTTOM_K) + | USEOTHER EQUAL booleanLiteral + | OTHERSTR EQUAL stringLiteral + | USENULL EQUAL booleanLiteral + | NULLSTR EQUAL stringLiteral + ; + +rowSplit + : fieldExpression binOption* + ; + +columnSplit + : fieldExpression binOption* + ; + timechartCommand : TIMECHART timechartParameter* statsFunction (BY fieldExpression)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 65323229162..09e9b4c77ed 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -42,6 +42,7 @@ import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.TerminalNode; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.EmptySourcePropagateVisitor; import org.opensearch.sql.ast.dsl.AstDSL; @@ -72,6 +73,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.DefaultBin; @@ -427,16 +429,7 @@ private ReplacePair buildReplacePair(OpenSearchPPLParser.ReplacePairContext ctx) /** Stats command. */ @Override public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { - ImmutableList.Builder aggListBuilder = new ImmutableList.Builder<>(); - for (OpenSearchPPLParser.StatsAggTermContext aggCtx : ctx.statsAggTerm()) { - UnresolvedExpression aggExpression = internalVisitExpression(aggCtx.statsFunction()); - String name = - aggCtx.alias == null - ? getTextInQuery(aggCtx) - : StringUtils.unquoteIdentifier(aggCtx.alias.getText()); - Alias alias = new Alias(name, aggExpression); - aggListBuilder.add(alias); - } + List aggregations = parseAggTerms(ctx.statsAggTerm()); List groupList = Optional.ofNullable(ctx.statsByClause()) @@ -461,7 +454,7 @@ public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { Aggregation aggregation = new Aggregation( - aggListBuilder.build(), + aggregations, Collections.emptyList(), groupList, span, @@ -609,60 +602,39 @@ public UnresolvedPlan visitBinCommand(BinCommandContext ctx) { UnresolvedExpression aligntime = null; UnresolvedExpression start = null; UnresolvedExpression end = null; - + String errorFormat = "Duplicate %s parameter in bin command"; // Process each bin option: detect duplicates and assign values in one shot for (OpenSearchPPLParser.BinOptionContext option : ctx.binOption()) { + UnresolvedExpression resolvedOption = internalVisitExpression(option); // SPAN parameter if (option.span != null) { - if (!seenParams.add("SPAN")) { - throw new IllegalArgumentException("Duplicate SPAN parameter in bin command"); - } - span = internalVisitExpression(option.span); + checkParamDuplication(seenParams, option.SPAN(), errorFormat); + span = resolvedOption; } - // BINS parameter if (option.bins != null) { - if (!seenParams.add("BINS")) { - throw new IllegalArgumentException("Duplicate BINS parameter in bin command"); - } - bins = Integer.parseInt(option.bins.getText()); + checkParamDuplication(seenParams, option.BINS(), errorFormat); + bins = (Integer) ((Literal) resolvedOption).getValue(); } - // MINSPAN parameter if (option.minspan != null) { - if (!seenParams.add("MINSPAN")) { - throw new IllegalArgumentException("Duplicate MINSPAN parameter in bin command"); - } - minspan = internalVisitExpression(option.minspan); + checkParamDuplication(seenParams, option.MINSPAN(), errorFormat); + minspan = resolvedOption; } - // ALIGNTIME parameter if (option.aligntime != null) { - if (!seenParams.add("ALIGNTIME")) { - throw new IllegalArgumentException("Duplicate ALIGNTIME parameter in bin command"); - } - aligntime = - option.aligntime.EARLIEST() != null - ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("earliest") - : option.aligntime.LATEST() != null - ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("latest") - : internalVisitExpression(option.aligntime.literalValue()); + checkParamDuplication(seenParams, option.ALIGNTIME(), errorFormat); + aligntime = resolvedOption; } - // START parameter if (option.start != null) { - if (!seenParams.add("START")) { - throw new IllegalArgumentException("Duplicate START parameter in bin command"); - } - start = internalVisitExpression(option.start); + checkParamDuplication(seenParams, option.START(), errorFormat); + start = resolvedOption; } - // END parameter if (option.end != null) { - if (!seenParams.add("END")) { - throw new IllegalArgumentException("Duplicate END parameter in bin command"); - } - end = internalVisitExpression(option.end); + checkParamDuplication(seenParams, option.END(), errorFormat); + end = resolvedOption; } } @@ -691,6 +663,14 @@ public UnresolvedPlan visitBinCommand(BinCommandContext ctx) { } } + private void checkParamDuplication( + Set seenParams, TerminalNode terminalNode, String errorFormat) { + String paramName = terminalNode.getText(); + if (!seenParams.add(paramName)) { + throw new IllegalArgumentException(StringUtils.format(errorFormat, paramName)); + } + } + /** Sort command. */ @Override public UnresolvedPlan visitSortCommand(SortCommandContext ctx) { @@ -728,6 +708,38 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont return new Reverse(); } + /** Chart command. */ + @Override + public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext ctx) { + UnresolvedExpression rowSplit = + ctx.rowSplit() == null ? null : internalVisitExpression(ctx.rowSplit()); + UnresolvedExpression columnSplit = + ctx.columnSplit() == null ? null : internalVisitExpression(ctx.columnSplit()); + List arguments = ArgumentFactory.getArgumentList(ctx); + UnresolvedExpression aggFunction = parseAggTerms(List.of(ctx.statsAggTerm())).getFirst(); + return Chart.builder() + .rowSplit(rowSplit) + .columnSplit(columnSplit) + .aggregationFunction(aggFunction) + .arguments(arguments) + .build(); + } + + private List parseAggTerms( + List statsAggTermContexts) { + ImmutableList.Builder aggListBuilder = new ImmutableList.Builder<>(); + for (OpenSearchPPLParser.StatsAggTermContext aggCtx : statsAggTermContexts) { + UnresolvedExpression aggExpression = internalVisitExpression(aggCtx.statsFunction()); + String name = + aggCtx.alias == null + ? getTextInQuery(aggCtx) + : StringUtils.unquoteIdentifier(aggCtx.alias.getText()); + Alias alias = new Alias(name, aggExpression); + aggListBuilder.add(alias); + } + return aggListBuilder.build(); + } + /** Timechart command. */ @Override public UnresolvedPlan visitTimechartCommand(OpenSearchPPLParser.TimechartCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 4a5230d356e..5dc1bf44d86 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -1030,4 +1030,58 @@ public UnresolvedExpression visitTimeModifierExpression( : SearchComparison.Operator.LESS_OR_EQUAL; return new SearchComparison(implicitTimestampField, operator, osDateMathLiteral); } + + @Override + public UnresolvedExpression visitBinOption(OpenSearchPPLParser.BinOptionContext ctx) { + UnresolvedExpression option; + if (ctx.span != null) { + option = visit(ctx.span); + } else if (ctx.bins != null) { + option = visit(ctx.bins); + } else if (ctx.minspan != null) { + option = visit(ctx.minspan); + } else if (ctx.aligntime != null) { + option = + ctx.aligntime.EARLIEST() != null + ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("earliest") + : ctx.aligntime.LATEST() != null + ? org.opensearch.sql.ast.dsl.AstDSL.stringLiteral("latest") + : visit(ctx.aligntime.literalValue()); + } else if (ctx.start != null) { + option = visit(ctx.start); + } else if (ctx.end != null) { + option = visit(ctx.end); + } else { + throw new SyntaxCheckException(StringUtils.format("Unknown bin option: %s", ctx.getText())); + } + return option; + } + + @Override + public UnresolvedExpression visitRowSplit(OpenSearchPPLParser.RowSplitContext ctx) { + // TODO: options ignored for now + Field field = (Field) visit(ctx.fieldExpression()); + for (var option : ctx.binOption()) { + if (option.span != null) { + return AstDSL.alias( + field.getField().toString(), + AstDSL.spanFromSpanLengthLiteral(field, (Literal) visit(option.binSpanValue()))); + } + } + return AstDSL.alias(ctx.fieldExpression().getText(), field); + } + + @Override + public UnresolvedExpression visitColumnSplit(OpenSearchPPLParser.ColumnSplitContext ctx) { + Field field = (Field) visit(ctx.fieldExpression()); + for (var option : ctx.binOption()) { + if (option.span != null) { + return AstDSL.alias( + field.getField().toString(), + AstDSL.spanFromSpanLengthLiteral(field, (Literal) visit(option.binSpanValue()))); + } + } + // TODO: options ignored for now + return AstDSL.alias(ctx.fieldExpression().getText(), field); + } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index acf204e8030..41e9e91535b 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Optional; import org.antlr.v4.runtime.ParserRuleContext; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; @@ -21,6 +22,7 @@ import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.BooleanLiteralContext; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.ChartCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DecimalLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DefaultSortFieldContext; @@ -199,6 +201,37 @@ private static Argument getTypeArgument(OpenSearchPPLParser.SortFieldExpressionC } } + public static List getArgumentList(ChartCommandContext ctx) { + List arguments = new ArrayList<>(); + for (var optionCtx : ctx.chartOptions()) { + if (optionCtx.LIMIT() != null) { + Literal limit; + if (optionCtx.integerLiteral() != null) { + limit = getArgumentValue(optionCtx.integerLiteral()); + } else { + limit = + AstDSL.intLiteral( + Integer.parseInt( + (optionCtx.TOP_K() != null ? optionCtx.TOP_K() : optionCtx.BOTTOM_K()) + .getText() + .replaceAll("[^0-9-]", ""))); + } + arguments.add(new Argument("limit", limit)); + // not specified | top presents -> true; bottom presents -> false + arguments.add(new Argument("top", AstDSL.booleanLiteral(optionCtx.BOTTOM_K() == null))); + } else if (optionCtx.USEOTHER() != null) { + arguments.add(new Argument("useother", getArgumentValue(optionCtx.booleanLiteral()))); + } else if (optionCtx.OTHERSTR() != null) { + arguments.add(new Argument("otherstr", getArgumentValue(optionCtx.stringLiteral()))); + } else if (optionCtx.USENULL() != null) { + arguments.add(new Argument("usenull", getArgumentValue(optionCtx.booleanLiteral()))); + } else if (optionCtx.NULLSTR() != null) { + arguments.add(new Argument("nullstr", getArgumentValue(optionCtx.stringLiteral()))); + } + } + return arguments; + } + /** * Get list of {@link Argument}. * diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 5b599ae162c..0971924295a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -56,6 +56,7 @@ import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.Bin; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.DefaultBin; @@ -531,6 +532,42 @@ public String visitTimechart(Timechart node, String context) { return StringUtils.format("%s%s", child, timechartCommand.toString()); } + @Override + public String visitChart(Chart node, String context) { + String child = node.getChild().get(0).accept(this, context); + StringBuilder chartCommand = new StringBuilder(); + chartCommand.append(" | chart"); + + for (Argument arg : node.getArguments()) { + String argName = arg.getArgName(); + // Skip the auto-generated "top" parameter that's added when limit is specified + if ("top".equals(argName)) { + continue; + } + if ("limit".equals(argName) || "useother".equals(argName) || "usenull".equals(argName)) { + chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); + } else if ("otherstr".equals(argName) || "nullstr".equals(argName)) { + chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); + } + } + + chartCommand.append(" ").append(visitExpression(node.getAggregationFunction())); + + if (node.getRowSplit() != null && node.getColumnSplit() != null) { + chartCommand + .append(" by ") + .append(visitExpression(node.getRowSplit())) + .append(" ") + .append(visitExpression(node.getColumnSplit())); + } else if (node.getRowSplit() != null) { + chartCommand.append(" by ").append(visitExpression(node.getRowSplit())); + } else if (node.getColumnSplit() != null) { + chartCommand.append(" by ").append(visitExpression(node.getColumnSplit())); + } + + return StringUtils.format("%s%s", child, chartCommand.toString()); + } + public String visitRex(Rex node, String context) { String child = node.getChild().get(0).accept(this, context); String field = visitExpression(node.getField()); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java new file mode 100644 index 00000000000..338b586ba29 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java @@ -0,0 +1,412 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; +import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; +import org.opensearch.sql.ppl.parser.AstBuilder; + +public class CalcitePPLChartTest extends CalcitePPLAbstractTest { + + public CalcitePPLChartTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Add events table for chart tests - similar to bank data used in integration tests + ImmutableList rows = + ImmutableList.of( + new Object[] {32838, "F", 28, "VA", java.sql.Timestamp.valueOf("2024-07-01 00:00:00")}, + new Object[] {40540, "F", 39, "PA", java.sql.Timestamp.valueOf("2024-07-01 00:01:00")}, + new Object[] {39225, "M", 32, "IL", java.sql.Timestamp.valueOf("2024-07-01 00:02:00")}, + new Object[] {4180, "M", 33, "MD", java.sql.Timestamp.valueOf("2024-07-01 00:03:00")}, + new Object[] {11052, "M", 36, "WA", java.sql.Timestamp.valueOf("2024-07-01 00:04:00")}, + new Object[] {48086, "F", 34, "IN", java.sql.Timestamp.valueOf("2024-07-01 00:05:00")}); + schema.add("bank", new BankTable(rows)); + + // Add time_data table for span tests + ImmutableList timeRows = + ImmutableList.of( + new Object[] {java.sql.Timestamp.valueOf("2025-07-28 00:00:00"), "A", 9367}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-29 00:00:00"), "B", 9521}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-30 00:00:00"), "C", 9187}, + new Object[] {java.sql.Timestamp.valueOf("2025-07-31 00:00:00"), "D", 8736}, + new Object[] {java.sql.Timestamp.valueOf("2025-08-01 00:00:00"), "A", 9015}); + schema.add("time_data", new TimeDataTable(timeRows)); + + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testChartWithSingleGroupKey() { + String ppl = "source=bank | chart avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`\n" + + "ORDER BY `gender` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithOverSyntax() { + String ppl = "source=bank | chart avg(balance) over gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`\n" + + "ORDER BY `gender` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithMultipleGroupKeys() { + String ppl = "source=bank | chart avg(balance) over gender by age"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t2`.`gender`, CASE WHEN `t2`.`age` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`age` ELSE 'OTHER' END `age`," + + " AVG(`t2`.`avg(balance)`) `avg(balance)`\n" + + "FROM (SELECT `gender`, SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`)" + + " `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`, `age`) `t2`\n" + + "LEFT JOIN (SELECT `age`, SUM(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY SUM(`avg(balance)`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`, `age`) `t6`\n" + + "WHERE `age` IS NOT NULL\n" + + "GROUP BY `age`) `t9` ON `t2`.`age` = `t9`.`age`\n" + + "GROUP BY `t2`.`gender`, CASE WHEN `t2`.`age` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`age` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`gender` NULLS LAST, 2 NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithMultipleGroupKeysAlternativeSyntax() { + String ppl = "source=bank | chart avg(balance) by gender, age"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t2`.`gender`, CASE WHEN `t2`.`age` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`age` ELSE 'OTHER' END `age`," + + " AVG(`t2`.`avg(balance)`) `avg(balance)`\n" + + "FROM (SELECT `gender`, SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`)" + + " `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`, `age`) `t2`\n" + + "LEFT JOIN (SELECT `age`, SUM(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY SUM(`avg(balance)`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`, `age`) `t6`\n" + + "WHERE `age` IS NOT NULL\n" + + "GROUP BY `age`) `t9` ON `t2`.`age` = `t9`.`age`\n" + + "GROUP BY `t2`.`gender`, CASE WHEN `t2`.`age` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`age` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`gender` NULLS LAST, 2 NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithLimit() { + String ppl = "source=bank | chart limit=2 avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`\n" + + "ORDER BY `gender` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithLimitZero() { + String ppl = "source=bank | chart limit=0 avg(balance) over state by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `state`, `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `state` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `state`, `gender`\n" + + "ORDER BY `state` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithSpan() { + String ppl = "source=bank | chart max(balance) by age span=10"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `SPAN`(`age`, 10, NULL) `age`, MAX(`balance`) `max(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `age` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `SPAN`(`age`, 10, NULL)\n" + + "ORDER BY 1 NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithTimeSpan() { + String ppl = "source=time_data | chart max(value) over timestamp span=1week by category"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `t2`.`timestamp`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END `category`," + + " MAX(`t2`.`max(value)`) `max(value)`\n" + + "FROM (SELECT `SPAN`(`timestamp`, 1, 'w') `timestamp`, `category`, MAX(`value`)" + + " `max(value)`\n" + + "FROM `scott`.`time_data`\n" + + "WHERE `timestamp` IS NOT NULL AND `value` IS NOT NULL\n" + + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t2`\n" + + "LEFT JOIN (SELECT `category`, SUM(`max(value)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY SUM(`max(value)`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `category`, MAX(`value`) `max(value)`\n" + + "FROM `scott`.`time_data`\n" + + "WHERE `timestamp` IS NOT NULL AND `value` IS NOT NULL\n" + + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t6`\n" + + "WHERE `category` IS NOT NULL\n" + + "GROUP BY `category`) `t9` ON `t2`.`category` = `t9`.`category`\n" + + "GROUP BY `t2`.`timestamp`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`timestamp` NULLS LAST, 2 NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseOtherTrue() { + String ppl = "source=bank | chart useother=true avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`\n" + + "ORDER BY `gender` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseOtherFalse() { + String ppl = "source=bank | chart useother=false limit=2 avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`\n" + + "ORDER BY `gender` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithOtherStr() { + String ppl = "source=bank | chart limit=1 otherstr='other_values' avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`\n" + + "ORDER BY `gender` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithNullStr() { + String ppl = "source=bank | chart nullstr='null_values' avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`\n" + + "ORDER BY `gender` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testChartWithUseNull() { + String ppl = "source=bank | chart usenull=false avg(balance) by gender"; + + RelNode root = getRelNode(ppl); + String expectedSparkSql = + "SELECT `gender`, AVG(`balance`) `avg(balance)`\n" + + "FROM `scott`.`bank`\n" + + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + + "GROUP BY `gender`\n" + + "ORDER BY `gender` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + private UnresolvedPlan parsePPL(String query) { + PPLSyntaxParser parser = new PPLSyntaxParser(); + AstBuilder astBuilder = new AstBuilder(query); + return astBuilder.visit(parser.parse(query)); + } + + @RequiredArgsConstructor + public static class BankTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("balance", SqlTypeName.INTEGER) + .nullable(true) + .add("gender", SqlTypeName.VARCHAR) + .nullable(true) + .add("age", SqlTypeName.INTEGER) + .nullable(true) + .add("state", SqlTypeName.VARCHAR) + .nullable(true) + .add("timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } + + @RequiredArgsConstructor + public static class TimeDataTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("timestamp", SqlTypeName.TIMESTAMP) + .nullable(true) + .add("category", SqlTypeName.VARCHAR) + .nullable(true) + .add("value", SqlTypeName.INTEGER) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java index ee6b82f2d85..c3ed1ebceea 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java @@ -260,13 +260,13 @@ public void testTimechartWithSpan1m() { RelNode root = getRelNode(ppl); String expectedSparkSql = "SELECT `t1`.`@timestamp`, CASE WHEN `t7`.`region` IS NOT NULL THEN `t1`.`region` ELSE CASE" - + " WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END `region`, SUM(`t1`.`$f2`)" + + " WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END `region`, AVG(`t1`.`$f2`)" + " `avg(cpu_usage)`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t1`\n" - + "LEFT JOIN (SELECT `region`, SUM(`$f2`) `grand_total`\n" + + "LEFT JOIN (SELECT `region`, AVG(`$f2`) `grand_total`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" @@ -297,13 +297,13 @@ public void testTimechartWithLimitAndUseOtherFalse() { RelNode root = getRelNode(ppl); String expectedSparkSql = "SELECT `t1`.`@timestamp`, CASE WHEN `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE" - + " WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `host`, SUM(`t1`.`$f2`)" + + " WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `host`, AVG(`t1`.`$f2`)" + " `avg(cpu_usage)`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t1`\n" - + "LEFT JOIN (SELECT `host`, SUM(`$f2`) `grand_total`\n" + + "LEFT JOIN (SELECT `host`, AVG(`$f2`) `grand_total`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" + " `$f2`\n" + "FROM `scott`.`events`\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index a1823e4befe..f1464e31065 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -67,6 +67,7 @@ import org.mockito.Mockito; import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; @@ -74,6 +75,7 @@ import org.opensearch.sql.ast.expression.PatternMode; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -1488,4 +1490,78 @@ public void testReplaceCommandWithMultiplePairs() { // Test multiple pattern/replacement pairs plan("source=t | replace 'a' WITH 'A', 'b' WITH 'B' IN field"); } + + @Test + public void testChartCommandBasic() { + assertEqual( + "source=t | chart count() by age", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("age", field("age"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments(emptyList()) + .build()); + } + + @Test + public void testChartCommandWithRowSplit() { + assertEqual( + "source=t | chart count() over status by age", + Chart.builder() + .child(relation("t")) + .rowSplit(alias("status", field("status"))) + .columnSplit(alias("age", field("age"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments(emptyList()) + .build()); + } + + @Test + public void testChartCommandWithOptions() { + assertEqual( + "source=t | chart limit=10 useother=true count() by status", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("status", field("status"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(true)))) + .build()); + } + + @Test + public void testChartCommandWithAllOptions() { + assertEqual( + "source=t | chart limit=top5 useother=false otherstr='OTHER' usenull=true nullstr='NULL'" + + " avg(balance) by gender", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("gender", field("gender"))) + .aggregationFunction(alias("avg(balance)", aggregate("avg", field("balance")))) + .arguments( + exprList( + argument("limit", intLiteral(5)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(false)), + argument("otherstr", stringLiteral("OTHER")), + argument("usenull", booleanLiteral(true)), + argument("nullstr", stringLiteral("NULL")))) + .build()); + } + + @Test + public void testChartCommandWithBottomLimit() { + assertEqual( + "source=t | chart limit=bottom3 count() by category", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("category", field("category"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments( + exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) + .build()); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 6b0e0a081f8..5d10960ea6b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -14,6 +14,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.allFields; import static org.opensearch.sql.ast.dsl.AstDSL.and; import static org.opensearch.sql.ast.dsl.AstDSL.argument; +import static org.opensearch.sql.ast.dsl.AstDSL.bin; import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.caseWhen; import static org.opensearch.sql.ast.dsl.AstDSL.cast; @@ -1605,4 +1606,66 @@ public void testVisitSpanLiteral() { .useOther(true) .build()); } + + @Test + public void testBinOptionWithSpan() { + assertEqual( + "source=t | bin age span=10", + bin(relation("t"), field("age"), argument("span", intLiteral(10)))); + } + + @Test + public void testBinOptionWithBins() { + assertEqual( + "source=t | bin age bins=5", + bin(relation("t"), field("age"), argument("bins", intLiteral(5)))); + } + + @Test + public void testBinOptionWithMinspan() { + assertEqual( + "source=t | bin age minspan=100", + bin(relation("t"), field("age"), argument("minspan", intLiteral(100)))); + } + + @Test + public void testBinOptionWithAligntimeEarliest() { + assertEqual( + "source=t | bin age span=10 aligntime=earliest", + bin( + relation("t"), + field("age"), + argument("span", intLiteral(10)), + argument("aligntime", stringLiteral("earliest")))); + } + + @Test + public void testBinOptionWithAligntimeLiteralValue() { + assertEqual( + "source=t | bin age span=10 aligntime=1000", + bin( + relation("t"), + field("age"), + argument("span", intLiteral(10)), + argument("aligntime", intLiteral(1000)))); + } + + @Test + public void testBinOptionWithStartAndEnd() { + assertEqual( + "source=t | bin age bins=10 start=0 end=100", + bin( + relation("t"), + field("age"), + argument("bins", intLiteral(10)), + argument("start", intLiteral(0)), + argument("end", intLiteral(100)))); + } + + @Test + public void testBinOptionWithTimeSpan() { + assertEqual( + "source=t | bin timestamp span=1h", + bin(relation("t"), field("timestamp"), argument("span", stringLiteral("1h")))); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java index adb9ec719e6..dc2a9d66061 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/ArgumentFactoryTest.java @@ -21,7 +21,9 @@ import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import org.junit.Test; +import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ppl.parser.AstBuilderTest; public class ArgumentFactoryTest extends AstBuilderTest { @@ -100,6 +102,39 @@ public void testSortFieldArgument() { argument("type", stringLiteral("auto")))))); } + @Test + public void testChartCommandArguments() { + assertEqual( + "source=t | chart limit=5 useother=true otherstr='OTHER_VAL' usenull=false" + + " nullstr='NULL_VAL' count() by age", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("age", field("age"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments( + exprList( + argument("limit", intLiteral(5)), + argument("top", booleanLiteral(true)), + argument("useother", booleanLiteral(true)), + argument("otherstr", stringLiteral("OTHER_VAL")), + argument("usenull", booleanLiteral(false)), + argument("nullstr", stringLiteral("NULL_VAL")))) + .build()); + } + + @Test + public void testChartCommandBottomArguments() { + assertEqual( + "source=t | chart limit=bottom3 count() by status", + Chart.builder() + .child(relation("t")) + .columnSplit(alias("status", field("status"))) + .aggregationFunction(alias("count()", aggregate("count", AllFields.of()))) + .arguments( + exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) + .build()); + } + @Test public void testNoArgConstructorForArgumentFactoryShouldPass() { new ArgumentFactory(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 48f6c45b4c6..ec87000b5bf 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -260,6 +260,34 @@ public void testTimechartCommand() { anonymize("source=t | timechart count() by host")); } + @Test + public void testChartCommand() { + assertEquals( + "source=table | chart count(identifier) by identifier identifier", + anonymize("source=t | chart count(age) by gender country")); + } + + @Test + public void testChartCommandWithParameters() { + assertEquals( + "source=table | chart limit=*** useother=*** avg(identifier) by identifier", + anonymize("source=t | chart limit=5 useother=false avg(balance) by state")); + } + + @Test + public void testChartCommandOver() { + assertEquals( + "source=table | chart avg(identifier) by identifier", + anonymize("source=t | chart avg(balance) over gender")); + } + + @Test + public void testChartCommandOverBy() { + assertEquals( + "source=table | chart sum(identifier) by identifier identifier", + anonymize("source=t | chart sum(amount) over gender by age")); + } + // todo, sort order is ignored, it doesn't impact the log analysis. @Test public void testSortCommandWithOptions() { From b8399049d81e6bc0794150de6d79526f7d524e07 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 10 Nov 2025 13:47:04 +0800 Subject: [PATCH 36/99] Support push down sort on aggregation measure for more than one agg calls (#4759) Signed-off-by: Lantao Jin --- .../calcite/plan/OpenSearchRuleConfig.java | 23 +++ .../calcite/plan/PPLAggGroupMergeRule.java | 2 +- .../calcite/plan/PPLAggregateConvertRule.java | 2 +- .../sql/calcite/utils/CalciteToolsHelper.java | 5 + .../calcite/clickbench/PPLClickBenchIT.java | 2 +- .../sql/calcite/remote/CalciteExplainIT.java | 33 +++- .../opensearch/sql/ppl/StatsCommandIT.java | 51 ++++++ .../calcite/clickbench/q10.yaml | 5 +- .../calcite/clickbench/q23.yaml | 5 +- .../calcite/clickbench/q31.yaml | 5 +- .../calcite/clickbench/q32.yaml | 5 +- .../calcite/clickbench/q33.yaml | 5 +- .../explain_agg_sort_on_measure_complex1.yaml | 11 ++ .../explain_agg_sort_on_measure_complex2.yaml | 12 ++ ...t_on_measure_multi_buckets_not_pushed.yaml | 13 ++ ...gg_with_sort_on_one_measure_not_push1.yaml | 6 +- .../planner/rules/AggregateIndexScanRule.java | 3 +- .../planner/rules/DedupPushdownRule.java | 3 +- .../ExpandCollationOnProjectExprRule.java | 3 +- .../planner/rules/FilterIndexScanRule.java | 3 +- .../planner/rules/LimitIndexScanRule.java | 3 +- .../planner/rules/ProjectIndexScanRule.java | 3 +- .../planner/rules/RareTopPushdownRule.java | 3 +- .../rules/RelevanceFunctionPushdownRule.java | 3 +- .../rules/SortAggregateMeasureRule.java | 3 +- .../planner/rules/SortIndexScanRule.java | 3 +- .../rules/SortProjectExprTransposeRule.java | 3 +- .../request/OpenSearchRequestBuilder.java | 10 +- .../storage/scan/CalciteLogicalIndexScan.java | 8 +- .../scan/context/AggPushDownAction.java | 162 +++++++----------- .../storage/scan/context/PushDownContext.java | 2 +- 31 files changed, 254 insertions(+), 146 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRuleConfig.java create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRuleConfig.java b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRuleConfig.java new file mode 100644 index 00000000000..ca031afbb51 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/OpenSearchRuleConfig.java @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.plan; + +import org.apache.calcite.plan.Contexts; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.tools.RelBuilderFactory; +import org.immutables.value.Value; +import org.opensearch.sql.calcite.utils.CalciteToolsHelper; + +public interface OpenSearchRuleConfig extends RelRule.Config { + + /** Return a custom RelBuilderFactory for creating OpenSearchRelBuilder */ + @Override + @Value.Default + default RelBuilderFactory relBuilderFactory() { + return CalciteToolsHelper.proto(Contexts.of(RelFactories.DEFAULT_STRUCT)); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java index f1671e0eb63..019de7fec01 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggGroupMergeRule.java @@ -105,7 +105,7 @@ public void apply(RelOptRuleCall call, LogicalAggregate aggregate, LogicalProjec /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { Config GROUP_MERGE = ImmutablePPLAggGroupMergeRule.Config.builder() .build() diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java index 2f385054482..c3069d85668 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/PPLAggregateConvertRule.java @@ -240,7 +240,7 @@ private RexNode aliasMaybe(RelBuilder builder, RexNode node, String alias) { /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { Config SUM_CONVERTER = ImmutablePPLAggregateConvertRule.Config.builder() .build() diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java index 6513f51a5a3..67f86dede7e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java @@ -83,6 +83,7 @@ import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.tools.RelRunner; import org.apache.calcite.util.Holder; import org.apache.calcite.util.Util; @@ -127,6 +128,10 @@ public static Connection connect(FrameworkConfig config, JavaTypeFactory typeFac } } + public static RelBuilderFactory proto(final Context context) { + return (cluster, schema) -> new OpenSearchRelBuilder(context, cluster, schema); + } + /** * This method copied from {@link Frameworks#withPrepare(FrameworkConfig, * Frameworks.BasePrepareAction)}. The purpose is the method {@link diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java index 07831b4cc09..4f557e70cc8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java @@ -70,12 +70,12 @@ public void test() throws IOException { } logger.info("Running Query{}", i); String ppl = sanitize(loadFromFile("clickbench/queries/q" + i + ".ppl")); - timing(summary, "q" + i, ppl); // V2 gets unstable scripts, ignore them when comparing plan if (isCalciteEnabled()) { String expected = loadExpectedPlan("clickbench/q" + i + ".yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } + timing(summary, "q" + i, ppl); } } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 0b103fd9fdc..994e583eaa8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1186,6 +1186,24 @@ public void testExplainSortOnMeasureMultiTermsWithScript() throws IOException { + " sort `count()`")); } + @Test + public void testExplainSortOnMeasureComplex() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_agg_sort_on_measure_complex1.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats bucket_nullable=false sum(balance)," + + " count() as c, dc(employer) by state | sort - c")); + expected = loadExpectedPlan("explain_agg_sort_on_measure_complex2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_state = lower(state) | stats" + + " bucket_nullable=false sum(balance), count(), dc(employer) as d by gender," + + " new_state | sort - d")); + } + @Test public void testExplainCompositeMultiBucketsAutoDateThenSortOnMeasureNotPushdown() throws IOException { @@ -1238,7 +1256,7 @@ public void testExplainCompositeRangeAutoDateThenSortOnMeasureNotPushdown() thro } @Test - public void testExplainMultipleAggregatorsWithSortOnOneMeasureNotPushDown() throws IOException { + public void testExplainMultipleCollationsWithSortOnOneMeasureNotPushDown() throws IOException { enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml"); @@ -1246,7 +1264,7 @@ public void testExplainMultipleAggregatorsWithSortOnOneMeasureNotPushDown() thro expected, explainQueryYaml( "source=opensearch-sql_test_index_account | stats bucket_nullable=false count() as c," - + " sum(balance) as s by state | sort c")); + + " sum(balance) as s by state | sort c, state")); expected = loadExpectedPlan("explain_multiple_agg_with_sort_on_one_measure_not_push2.yaml"); assertYamlEqualsIgnoreId( expected, @@ -1255,6 +1273,17 @@ public void testExplainMultipleAggregatorsWithSortOnOneMeasureNotPushDown() thro + " sum(balance) as s by state | sort c, s")); } + @Test + public void testExplainSortOnMeasureMultiBucketsNotMultiTermsNotPushDown() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats bucket_nullable=false count() as c," + + " sum(balance) as s by state, span(age, 5) | sort c")); + } + @Test public void testExplainEvalMax() throws IOException { String expected = loadExpectedPlan("explain_eval_max.json"); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java index 3e0b93bc7cd..eb6573c27b6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java @@ -1174,4 +1174,55 @@ public void testStatsSpanSortOnMeasureMultiTermsWithScript() throws IOException resetQueryBucketSize(); } } + + @Test + public void testStatsSortOnMeasureComplex() throws IOException { + try { + setQueryBucketSize(5); + JSONObject response = + executeQuery( + String.format( + "source=%s | stats bucket_nullable=false sum(balance), count() as c, dc(employer)" + + " as d by state | sort - c | head 5", + TEST_INDEX_ACCOUNT)); + verifySchema( + response, + schema("sum(balance)", null, "bigint"), + schema("c", null, "bigint"), + schema("d", null, "bigint"), + schema("state", null, "string")); + System.out.println(response); + verifyDataRows( + response, + rows(782199, 30, 30, "TX"), + rows(732523, 28, 28, "MD"), + rows(657957, 27, 27, "ID"), + rows(541575, 25, 25, "ME"), + rows(643489, 25, 25, "AL")); + response = + executeQuery( + String.format( + "source=%s | eval new_state = lower(state) | stats bucket_nullable=false" + + " sum(balance), count() as c, dc(employer) as d by gender, new_state | sort" + + " - d | head 5", + TEST_INDEX_ACCOUNT)); + verifySchema( + response, + schema("sum(balance)", null, "bigint"), + schema("c", null, "bigint"), + schema("d", null, "bigint"), + schema("gender", null, "string"), + schema("new_state", null, "string")); + System.out.println(response); + verifyDataRows( + response, + rows(484567, 18, 18, "M", "md"), + rows(376394, 17, 17, "M", "id"), + rows(505688, 17, 17, "F", "tx"), + rows(375409, 16, 16, "M", "me"), + rows(432776, 15, 15, "M", "ok")); + } finally { + resetQueryBucketSize(); + } + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml index 8138d506a93..4e6c0e1f59f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml @@ -8,7 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($68)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC-nulls-last]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"RegionID":{"terms":{"field":"RegionID","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml index f258552964f..acbf78ee28d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml @@ -9,7 +9,4 @@ calcite: LogicalFilter(condition=[AND(ILIKE($97, '%Google%', '\'), <>($63, ''), NOT(ILIKE($26, '%.google.%', '\')))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase, UserID, Title], FILTER->AND(ILIKE($3, '%Google%', '\'), <>($1, ''), NOT(ILIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),dc(UserID)=COUNT(DISTINCT $1)), PROJECT->[c, dc(UserID), SearchPhrase]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","case_insensitive":true,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","case_insensitive":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase","UserID","Title"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"SearchPhrase":{"terms":{"field":"SearchPhrase","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase, UserID, Title], FILTER->AND(ILIKE($3, '%Google%', '\'), <>($1, ''), NOT(ILIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),dc(UserID)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","case_insensitive":true,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","case_insensitive":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase","UserID","Title"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml index a0bab4f2aed..16e58d05b48 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml @@ -9,7 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"SearchEngineID":{"terms":{"field":"SearchEngineID","missing_bucket":false,"order":"asc"}}},{"ClientIP":{"terms":{"field":"ClientIP","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml index 60e5f7af061..2c78447a13d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml @@ -9,7 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"WatchID":{"terms":{"field":"WatchID","missing_bucket":false,"order":"asc"}}},{"ClientIP":{"terms":{"field":"ClientIP","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml index 998d052f16e..964dcece0f6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml @@ -8,7 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($41), IS NOT NULL($76))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"WatchID":{"terms":{"field":"WatchID","missing_bucket":false,"order":"asc"}}},{"ClientIP":{"terms":{"field":"ClientIP","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml new file mode 100644 index 00000000000..e75e44a129d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[DESC-nulls-last]) + LogicalProject(sum(balance)=[$1], c=[$2], dc(employer)=[$3], state=[$0]) + LogicalAggregate(group=[{0}], sum(balance)=[SUM($1)], c=[COUNT()], dc(employer)=[COUNT(DISTINCT $2)]) + LogicalProject(state=[$7], balance=[$3], employer=[$6]) + LogicalFilter(condition=[IS NOT NULL($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(balance), c, dc(employer), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml new file mode 100644 index 00000000000..1a3df86b66a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$2], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$2], dir0=[DESC-nulls-last]) + LogicalProject(sum(balance)=[$2], count()=[$3], d=[$4], gender=[$0], new_state=[$1]) + LogicalAggregate(group=[{0, 1}], sum(balance)=[SUM($2)], count()=[COUNT()], d=[COUNT(DISTINCT $3)]) + LogicalProject(gender=[$4], new_state=[$17], balance=[$3], employer=[$6]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($17))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},sum(balance)=SUM($0),count()=COUNT(),d=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[4 DESC LAST], PROJECT->[sum(balance), count(), d, gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJzdGF0ZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydACjewogICJvcCI6IHsKICAgICJuYW1lIjogIkxPV0VSIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABXN0YXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml new file mode 100644 index 00000000000..4ecd0e026bb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[ASC-nulls-first]) + LogicalProject(c=[$2], s=[$3], span(age,5)=[$1], state=[$0]) + LogicalAggregate(group=[{0, 2}], c=[COUNT()], s=[SUM($1)]) + LogicalProject(state=[$7], balance=[$3], span(age,5)=[SPAN($8, 5, null:NULL)]) + LogicalFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, state, age], FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),s=SUM($1)), PROJECT->[c, s, span(age,5), state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"age","boost":1.0}},{"exists":{"field":"state","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","state","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}},{"span(age,5)":{"histogram":{"field":"age","missing_bucket":false,"order":"asc","interval":5.0}}}]},"aggregations":{"s":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml index 6a5bc8ea0f5..8ac888eab4a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml @@ -1,7 +1,7 @@ calcite: logical: | - LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(sort0=[$0], dir0=[ASC-nulls-first]) + LogicalSystemLimit(sort0=[$0], sort1=[$2], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], sort1=[$2], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) LogicalProject(c=[$1], s=[$2], state=[$0]) LogicalAggregate(group=[{0}], c=[COUNT()], s=[SUM($1)]) LogicalProject(state=[$7], balance=[$3]) @@ -9,5 +9,5 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) + EnumerableSort(sort0=[$0], sort1=[$2], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),s=SUM($0)), PROJECT->[c, s, state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"s":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java index 462c4be7243..77107b0c8e0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java @@ -23,6 +23,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.immutables.value.Value; import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.expression.function.udf.binning.WidthBucketFunction; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; @@ -91,7 +92,7 @@ protected void apply( /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { Config DEFAULT = ImmutableAggregateIndexScanRule.Config.builder() .build() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index 9adc14ee70f..bdace1bd0a3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -22,6 +22,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @@ -106,7 +107,7 @@ private static boolean validFilter(LogicalFilter filter) { * LogicalFilter(condition=[IS NOT NULL($0)])
*/ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { Config DEFAULT = ImmutableDedupPushdownRule.Config.builder() .build() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java index b1bd711601d..204ecacbd39 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java @@ -19,6 +19,7 @@ import org.apache.calcite.rel.core.Project; import org.apache.commons.lang3.tuple.Pair; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; @@ -94,7 +95,7 @@ public void onMatch(RelOptRuleCall call) { } @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { /** * Only match ENUMERABLE convention RelNode combination like below to narrow the optimization diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/FilterIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/FilterIndexScanRule.java index 8aa2f77b6ac..b0c4f55aa3d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/FilterIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/FilterIndexScanRule.java @@ -12,6 +12,7 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.logical.LogicalFilter; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @@ -48,7 +49,7 @@ protected void apply(RelOptRuleCall call, Filter filter, CalciteLogicalIndexScan /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { /** Config that matches Filter on CalciteLogicalIndexScan. */ Config DEFAULT = ImmutableFilterIndexScanRule.Config.builder() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java index 1c24c7664fc..5d5412ce86d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java @@ -13,6 +13,7 @@ import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @@ -82,7 +83,7 @@ private static Integer extractOffsetValue(RexNode offset) { /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { LimitIndexScanRule.Config DEFAULT = ImmutableLimitIndexScanRule.Config.builder() .build() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ProjectIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ProjectIndexScanRule.java index d4c7986145e..629869be547 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ProjectIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ProjectIndexScanRule.java @@ -21,6 +21,7 @@ import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.Mappings; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @@ -102,7 +103,7 @@ public boolean isIdentity(Integer size) { /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { /** Config that matches Project on ProjectIndexScanRule. */ Config DEFAULT = ImmutableProjectIndexScanRule.Config.builder() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RareTopPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RareTopPushdownRule.java index a04d8cea0b2..2d065ff0c3f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RareTopPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RareTopPushdownRule.java @@ -17,6 +17,7 @@ import org.apache.calcite.rex.RexWindow; import org.apache.calcite.sql.SqlKind; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @@ -73,7 +74,7 @@ public void onMatch(RelOptRuleCall call) { } @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { RareTopPushdownRule.Config DEFAULT = ImmutableRareTopPushdownRule.Config.builder() .build() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RelevanceFunctionPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RelevanceFunctionPushdownRule.java index 31a67f49757..6ec968ebc6d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RelevanceFunctionPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RelevanceFunctionPushdownRule.java @@ -18,6 +18,7 @@ import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlOperator; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; /** @@ -100,7 +101,7 @@ boolean hasRelevanceFunction() { /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { /** Config that matches Filter on CalciteLogicalIndexScan. */ Config DEFAULT = ImmutableRelevanceFunctionPushdownRule.Config.builder() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortAggregateMeasureRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortAggregateMeasureRule.java index 1b40063e6b1..62587a2d430 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortAggregateMeasureRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortAggregateMeasureRule.java @@ -11,6 +11,7 @@ import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.logical.LogicalSort; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @@ -34,7 +35,7 @@ public void onMatch(RelOptRuleCall call) { /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { // TODO support multiple measures, only support single measure sort Predicate hasOneFieldCollation = sort -> sort.getCollation().getFieldCollations().size() == 1; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortIndexScanRule.java index ff30324d09f..86a039cc145 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortIndexScanRule.java @@ -10,6 +10,7 @@ import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.core.Sort; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; @@ -37,7 +38,7 @@ public void onMatch(RelOptRuleCall call) { /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { SortIndexScanRule.Config DEFAULT = ImmutableSortIndexScanRule.Config.builder() .build() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java index 48684020909..9f27bfd3954 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java @@ -26,6 +26,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.commons.lang3.tuple.Pair; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; @@ -121,7 +122,7 @@ public void onMatch(RelOptRuleCall call) { * and physical conventions, aka LogicalSort with fetch vs EnumerableLimit. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { SortProjectExprTransposeRule.Config DEFAULT = ImmutableSortProjectExprTransposeRule.Config.builder() .build() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 6a25df6ff3a..3416515b08a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -198,18 +198,18 @@ public void pushDownFilterForCalcite(QueryBuilder query) { /** * Push down aggregation to DSL request. * - * @param aggregationBuilder pair of aggregation query and aggregation parser. + * @param builderAndParser pair of aggregation query and aggregation parser. */ public void pushDownAggregation( - Pair, OpenSearchAggregationResponseParser> aggregationBuilder) { - aggregationBuilder.getLeft().forEach(sourceBuilder::aggregation); + Pair, OpenSearchAggregationResponseParser> builderAndParser) { + builderAndParser.getLeft().forEach(sourceBuilder::aggregation); sourceBuilder.size(0); - exprValueFactory.setParser(aggregationBuilder.getRight()); + exprValueFactory.setParser(builderAndParser.getRight()); // no need to sort docs for aggregation if (sourceBuilder.sorts() != null) { sourceBuilder.sorts().clear(); } - if (aggregationBuilder.getRight() instanceof CountAsTotalHitsParser) { + if (builderAndParser.getRight() instanceof CountAsTotalHitsParser) { sourceBuilder.trackTotalHits(true); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index aa09a86d864..3d01c81aeae 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -294,7 +294,7 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { try { if (!pushDownContext.isAggregatePushed()) return null; List aggregationBuilders = - pushDownContext.getAggPushDownAction().getAggregationBuilder().getLeft(); + pushDownContext.getAggPushDownAction().getBuilderAndParser().getLeft(); if (aggregationBuilders.size() != 1) { return null; } @@ -302,7 +302,7 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { return null; } List collationNames = getCollationNames(sort.getCollation().getFieldCollations()); - if (!isAllCollationNamesEqualAggregators(collationNames)) { + if (!isAnyCollationNameInAggregators(collationNames)) { return null; } CalciteLogicalIndexScan newScan = copyWithNewTraitSet(sort.getTraitSet()); @@ -366,7 +366,7 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, Project project) { AggregateAnalyzer.AggregateBuilderHelper helper = new AggregateAnalyzer.AggregateBuilderHelper( getRowType(), fieldTypes, getCluster(), bucketNullable, bucketSize); - final Pair, OpenSearchAggregationResponseParser> aggregationBuilder = + final Pair, OpenSearchAggregationResponseParser> builderAndParser = AggregateAnalyzer.analyze(aggregate, project, outputFields, helper); Map extendedTypeMapping = aggregate.getRowType().getFieldList().stream() @@ -379,7 +379,7 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, Project project) { field.getType())))); AggPushDownAction action = new AggPushDownAction( - aggregationBuilder, + builderAndParser, extendedTypeMapping, outputFields.subList(0, aggregate.getGroupSet().cardinality())); newScan.pushDownContext.add(PushDownType.AGGREGATION, aggregate, action); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 009dc0f57b7..d26027cf471 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -15,7 +15,6 @@ import lombok.Getter; import org.apache.calcite.rel.RelFieldCollation; import org.apache.commons.lang3.tuple.Pair; -import org.opensearch.search.aggregations.AbstractAggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.AggregatorFactories; @@ -44,21 +43,22 @@ @Getter @EqualsAndHashCode public class AggPushDownAction implements OSRequestBuilderAction { + private static final int MAX_BUCKET_SIZE = 65535; - private Pair, OpenSearchAggregationResponseParser> aggregationBuilder; + private Pair, OpenSearchAggregationResponseParser> builderAndParser; private final Map extendedTypeMapping; private final long scriptCount; // Record the output field names of all buckets as the sequence of buckets private List bucketNames; public AggPushDownAction( - Pair, OpenSearchAggregationResponseParser> aggregationBuilder, + Pair, OpenSearchAggregationResponseParser> builderAndParser, Map extendedTypeMapping, List bucketNames) { - this.aggregationBuilder = aggregationBuilder; + this.builderAndParser = builderAndParser; this.extendedTypeMapping = extendedTypeMapping; this.scriptCount = - aggregationBuilder.getLeft().stream().mapToInt(AggPushDownAction::getScriptCount).sum(); + builderAndParser.getLeft().stream().mapToInt(AggPushDownAction::getScriptCount).sum(); this.bucketNames = bucketNames; } @@ -82,7 +82,7 @@ private static int getScriptCount(AggregationBuilder aggBuilder) { @Override public void apply(OpenSearchRequestBuilder requestBuilder) { - requestBuilder.pushDownAggregation(aggregationBuilder); + requestBuilder.pushDownAggregation(builderAndParser); requestBuilder.pushTypeMapping(extendedTypeMapping); } @@ -109,132 +109,100 @@ private String multiTermsBucketNameAsString(CompositeAggregationBuilder composit /** Re-pushdown a sort aggregation measure to replace the pushed composite aggregation */ public void rePushDownSortAggMeasure( List collations, List fieldNames) { - if (aggregationBuilder.getLeft().isEmpty()) return; - AggregationBuilder builder = aggregationBuilder.getLeft().getFirst(); - if (builder instanceof CompositeAggregationBuilder composite) { + if (builderAndParser.getLeft().isEmpty()) return; + if (builderAndParser.getLeft().getFirst() instanceof CompositeAggregationBuilder composite) { String path = getAggregationPath(collations, fieldNames, composite); BucketOrder bucketOrder = collations.get(0).getDirection() == RelFieldCollation.Direction.ASCENDING ? BucketOrder.aggregation(path, true) : BucketOrder.aggregation(path, false); - + AggregationBuilder aggregationBuilder = null; if (composite.sources().size() == 1) { if (composite.sources().get(0) instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) { - TermsAggregationBuilder termsBuilder = - buildTermsAggregationBuilder(terms, bucketOrder, composite.size()); - attachSubAggregations(composite.getSubAggregations(), path, termsBuilder); - aggregationBuilder = - Pair.of( - Collections.singletonList(termsBuilder), - convertTo(aggregationBuilder.getRight())); - return; + aggregationBuilder = buildTermsAggregationBuilder(terms, bucketOrder, composite.size()); + attachSubAggregations(composite.getSubAggregations(), path, aggregationBuilder); } else if (composite.sources().get(0) instanceof DateHistogramValuesSourceBuilder dateHisto) { - DateHistogramAggregationBuilder dateHistoBuilder = - buildDateHistogramAggregationBuilder(dateHisto, bucketOrder); - attachSubAggregations(composite.getSubAggregations(), path, dateHistoBuilder); - aggregationBuilder = - Pair.of( - Collections.singletonList(dateHistoBuilder), - convertTo(aggregationBuilder.getRight())); - return; + aggregationBuilder = buildDateHistogramAggregationBuilder(dateHisto, bucketOrder); + attachSubAggregations(composite.getSubAggregations(), path, aggregationBuilder); } else if (composite.sources().get(0) instanceof HistogramValuesSourceBuilder histo && !histo.missingBucket()) { - HistogramAggregationBuilder histoBuilder = - buildHistogramAggregationBuilder(histo, bucketOrder); - attachSubAggregations(composite.getSubAggregations(), path, histoBuilder); - aggregationBuilder = - Pair.of( - Collections.singletonList(histoBuilder), - convertTo(aggregationBuilder.getRight())); - return; + aggregationBuilder = buildHistogramAggregationBuilder(histo, bucketOrder); + attachSubAggregations(composite.getSubAggregations(), path, aggregationBuilder); + } else { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort aggregate measure"); } } else { if (composite.sources().stream() .allMatch( src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket())) { // multi-term agg - MultiTermsAggregationBuilder multiTermsBuilder = - buildMultiTermsAggregationBuilder(composite, bucketOrder); - attachSubAggregations(composite.getSubAggregations(), path, multiTermsBuilder); - aggregationBuilder = - Pair.of( - Collections.singletonList(multiTermsBuilder), - convertTo(aggregationBuilder.getRight())); - return; + aggregationBuilder = buildMultiTermsAggregationBuilder(composite, bucketOrder); + attachSubAggregations(composite.getSubAggregations(), path, aggregationBuilder); + } else { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown sort aggregate measure"); } } - throw new OpenSearchRequestBuilder.PushDownUnSupportedException( - "Cannot pushdown sort aggregate measure"); + builderAndParser = + Pair.of( + Collections.singletonList(aggregationBuilder), + convertTo(builderAndParser.getRight())); } } /** Re-pushdown a nested aggregation for rare/top to replace the pushed composite aggregation */ public void rePushDownRareTop(RareTopDigest digest) { - if (aggregationBuilder.getLeft().isEmpty()) return; - AggregationBuilder builder = aggregationBuilder.getLeft().getFirst(); - if (builder instanceof CompositeAggregationBuilder composite) { + if (builderAndParser.getLeft().isEmpty()) return; + if (builderAndParser.getLeft().getFirst() instanceof CompositeAggregationBuilder composite) { BucketOrder bucketOrder = digest.direction() == RelFieldCollation.Direction.ASCENDING ? BucketOrder.count(true) : BucketOrder.count(false); + AggregationBuilder aggregationBuilder = null; if (composite.sources().size() == 1) { if (composite.sources().get(0) instanceof TermsValuesSourceBuilder terms && !terms.missingBucket()) { - TermsAggregationBuilder termsBuilder = - buildTermsAggregationBuilder(terms, bucketOrder, digest.number()); - aggregationBuilder = - Pair.of( - Collections.singletonList(termsBuilder), - convertTo(aggregationBuilder.getRight())); - return; + aggregationBuilder = buildTermsAggregationBuilder(terms, bucketOrder, digest.number()); } else if (composite.sources().get(0) instanceof DateHistogramValuesSourceBuilder dateHisto) { // for top/rare, only field can be used in by-clause, so this branch never accessed now - DateHistogramAggregationBuilder dateHistoBuilder = - buildDateHistogramAggregationBuilder(dateHisto, bucketOrder); - aggregationBuilder = - Pair.of( - Collections.singletonList(dateHistoBuilder), - convertTo(aggregationBuilder.getRight())); - return; + aggregationBuilder = buildDateHistogramAggregationBuilder(dateHisto, bucketOrder); } else if (composite.sources().get(0) instanceof HistogramValuesSourceBuilder histo && !histo.missingBucket()) { // for top/rare, only field can be used in by-clause, so this branch never accessed now - HistogramAggregationBuilder histoBuilder = - buildHistogramAggregationBuilder(histo, bucketOrder); - aggregationBuilder = - Pair.of( - Collections.singletonList(histoBuilder), - convertTo(aggregationBuilder.getRight())); - return; + aggregationBuilder = buildHistogramAggregationBuilder(histo, bucketOrder); + } else { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown " + digest); } } else { if (composite.sources().stream() .allMatch( src -> src instanceof TermsValuesSourceBuilder terms && !terms.missingBucket())) { - // nested term agg - TermsAggregationBuilder termsBuilder = null; for (int i = 0; i < composite.sources().size(); i++) { TermsValuesSourceBuilder terms = (TermsValuesSourceBuilder) composite.sources().get(i); if (i == 0) { // first - termsBuilder = buildTermsAggregationBuilder(terms, null, 65535); + aggregationBuilder = buildTermsAggregationBuilder(terms, null, MAX_BUCKET_SIZE); } else if (i == composite.sources().size() - 1) { // last - termsBuilder.subAggregation( + aggregationBuilder.subAggregation( buildTermsAggregationBuilder(terms, bucketOrder, digest.number())); } else { - termsBuilder.subAggregation(buildTermsAggregationBuilder(terms, null, 65535)); + aggregationBuilder.subAggregation( + buildTermsAggregationBuilder(terms, null, MAX_BUCKET_SIZE)); } } - aggregationBuilder = - Pair.of( - Collections.singletonList(termsBuilder), - convertTo(aggregationBuilder.getRight())); - return; + } else { + throw new OpenSearchRequestBuilder.PushDownUnSupportedException( + "Cannot pushdown " + digest); } } - throw new OpenSearchRequestBuilder.PushDownUnSupportedException("Cannot pushdown " + digest); + builderAndParser = + Pair.of( + Collections.singletonList(aggregationBuilder), + convertTo(builderAndParser.getRight())); } } @@ -328,29 +296,29 @@ private String getAggregationPath( List collations, List fieldNames, CompositeAggregationBuilder composite) { - String path; AggregationBuilder metric = composite.getSubAggregations().stream().findFirst().orElse(null); - if (metric == null) { - // count agg optimized, get the path name from field names - path = fieldNames.get(collations.get(0).getFieldIndex()); - } else if (metric instanceof ValuesSourceAggregationBuilder.LeafOnly) { - path = metric.getName(); - } else { - // we do not support pushdown sort aggregate measure for nested aggregation + if (metric != null && !(metric instanceof ValuesSourceAggregationBuilder.LeafOnly)) { + // do not pushdown sort aggregate measure for nested aggregation, e.g. composite then range throw new OpenSearchRequestBuilder.PushDownUnSupportedException( "Cannot pushdown sort aggregate measure, composite.getSubAggregations() is not a" + " LeafOnly"); } - return path; + return fieldNames.get(collations.get(0).getFieldIndex()); } - private > T attachSubAggregations( - Collection subAggregations, String path, T aggregationBuilder) { + private AggregationBuilder attachSubAggregations( + Collection subAggregations, + String path, + AggregationBuilder aggregationBuilder) { AggregatorFactories.Builder metricBuilder = new AggregatorFactories.Builder(); if (subAggregations.isEmpty()) { metricBuilder.addAggregator(AggregationBuilders.count(path).field("_index")); } else { - metricBuilder.addAggregator(subAggregations.stream().toList().get(0)); + subAggregations.forEach(metricBuilder::addAggregator); + // the count aggregator may be eliminated by doc_count optimization, add it back + if (subAggregations.stream().noneMatch(sub -> sub.getName().equals(path))) { + metricBuilder.addAggregator(AggregationBuilders.count(path).field("_index")); + } } aggregationBuilder.subAggregations(metricBuilder); return aggregationBuilder; @@ -359,8 +327,8 @@ private > T attachSubAggregations( public void pushDownSortIntoAggBucket( List collations, List fieldNames) { // aggregationBuilder.getLeft() could be empty when count agg optimization works - if (aggregationBuilder.getLeft().isEmpty()) return; - AggregationBuilder builder = aggregationBuilder.getLeft().getFirst(); + if (builderAndParser.getLeft().isEmpty()) return; + AggregationBuilder builder = builderAndParser.getLeft().getFirst(); List selected = new ArrayList<>(collations.size()); if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { // It will always use a single CompositeAggregationBuilder for the aggregation with GroupBy @@ -408,13 +376,13 @@ public void pushDownSortIntoAggBucket( }); AggregatorFactories.Builder newAggBuilder = new AggregatorFactories.Builder(); compositeAggBuilder.getSubAggregations().forEach(newAggBuilder::addAggregator); - aggregationBuilder = + builderAndParser = Pair.of( Collections.singletonList( AggregationBuilders.composite("composite_buckets", newBuckets) .subAggregations(newAggBuilder) .size(compositeAggBuilder.size())), - aggregationBuilder.getRight()); + builderAndParser.getRight()); bucketNames = newBucketNames; } if (builder instanceof TermsAggregationBuilder termsAggBuilder) { @@ -429,8 +397,8 @@ public void pushDownSortIntoAggBucket( */ public boolean pushDownLimitIntoBucketSize(Integer size) { // aggregationBuilder.getLeft() could be empty when count agg optimization works - if (aggregationBuilder.getLeft().isEmpty()) return false; - AggregationBuilder builder = aggregationBuilder.getLeft().getFirst(); + if (builderAndParser.getLeft().isEmpty()) return false; + AggregationBuilder builder = builderAndParser.getLeft().getFirst(); if (builder instanceof CompositeAggregationBuilder compositeAggBuilder) { if (size < compositeAggBuilder.size()) { compositeAggBuilder.size(size); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 1b50a2a8751..9098d1ca17c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -94,6 +94,7 @@ ArrayDeque getOperationsForAgg() { @Override public boolean add(PushDownOperation operation) { + operation.action().transform(this, operation); if (operation.type() == PushDownType.AGGREGATION) { isAggregatePushed = true; this.aggPushDownAction = (AggPushDownAction) operation.action(); @@ -116,7 +117,6 @@ public boolean add(PushDownOperation operation) { if (operation.type() == PushDownType.RARE_TOP) { isRareTopPushed = true; } - operation.action().transform(this, operation); return true; } From 5211d70dfdb8d0641e3d38043c390396d35e699e Mon Sep 17 00:00:00 2001 From: Aaron Alvarez <900908alvarezaaron@gmail.com> Date: Mon, 10 Nov 2025 11:36:55 -0800 Subject: [PATCH 37/99] Adding IT suite for PPL-based dashboards in Neo for CloudWatch Lake (#4695) Co-authored-by: Aaron Alvarez Co-authored-by: Jialiang Liang --- .../dashboard/CloudTrailPplDashboardIT.java | 242 ++++++ .../sql/ppl/dashboard/NfwPplDashboardIT.java | 753 ++++++++++++++++++ .../opensearch/sql/ppl/dashboard/README.md | 64 ++ .../dashboard/VpcFlowLogsPplDashboardIT.java | 268 +++++++ .../sql/ppl/dashboard/WafPplDashboardIT.java | 200 +++++ .../cloudtrail_logs_index_mapping.json | 140 ++++ .../mappings/nfw_logs_index_mapping.json | 117 +++ .../mappings/vpc_logs_index_mapping.json | 72 ++ .../mappings/waf_logs_index_mapping.json | 104 +++ .../templates/dashboard/cloudtrail.rst | 294 +++++++ .../ppl/dashboard/templates/dashboard/nfw.rst | 600 ++++++++++++++ .../ppl/dashboard/templates/dashboard/vpc.rst | 289 +++++++ .../ppl/dashboard/templates/dashboard/waf.rst | 224 ++++++ .../dashboard/testdata/cloudtrail_logs.json | 200 +++++ .../sql/ppl/dashboard/testdata/nfw_logs.json | 200 +++++ .../sql/ppl/dashboard/testdata/vpc_logs.json | 200 +++++ .../sql/ppl/dashboard/testdata/waf_logs.json | 200 +++++ 17 files changed, 4167 insertions(+) create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/CloudTrailPplDashboardIT.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/NfwPplDashboardIT.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/README.md create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/VpcFlowLogsPplDashboardIT.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/WafPplDashboardIT.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/cloudtrail_logs_index_mapping.json create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/nfw_logs_index_mapping.json create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/vpc_logs_index_mapping.json create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/waf_logs_index_mapping.json create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/cloudtrail.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/nfw.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/vpc.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/waf.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/cloudtrail_logs.json create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/nfw_logs.json create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/vpc_logs.json create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/waf_logs.json diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/CloudTrailPplDashboardIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/CloudTrailPplDashboardIT.java new file mode 100644 index 00000000000..0a1c19a4e5a --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/CloudTrailPplDashboardIT.java @@ -0,0 +1,242 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.dashboard; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.legacy.TestUtils; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Integration tests for CloudTrail PPL dashboard queries using exact original dashboard query + * format. These tests ensure that CloudTrail-related PPL queries work correctly with actual test + * data. + */ +public class CloudTrailPplDashboardIT extends PPLIntegTestCase { + + private static final String CLOUDTRAIL_LOGS_INDEX = "cloudtrail_logs"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadCloudTrailLogsIndex(); + } + + private void loadCloudTrailLogsIndex() throws IOException { + if (!TestUtils.isIndexExist(client(), CLOUDTRAIL_LOGS_INDEX)) { + String mapping = TestUtils.getMappingFile("mappings/cloudtrail_logs_index_mapping.json"); + TestUtils.createIndexByRestClient(client(), CLOUDTRAIL_LOGS_INDEX, mapping); + TestUtils.loadDataByRestClient( + client(), + CLOUDTRAIL_LOGS_INDEX, + "src/test/java/org/opensearch/sql/ppl/dashboard/testdata/cloudtrail_logs.json"); + } + } + + @Test + public void testTotalEventsCount() throws IOException { + String query = + String.format("source=%s | stats count() as `Event Count`", CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("Event Count", null, "bigint")); + verifyDataRows(response, rows(100)); + } + + @Test + public void testEventsOverTime() throws IOException { + String query = + String.format("source=%s | stats count() by span(start_time, 30d)", CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("count()", null, "bigint"), + schema("span(start_time,30d)", null, "timestamp")); + } + + @Test + public void testEventsByAccountIds() throws IOException { + String query = + String.format( + "source=%s | where isnotnull(userIdentity.accountId) | stats count() as Count by" + + " userIdentity.accountId | sort - Count | head 10", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", null, "bigint"), + schema("userIdentity.accountId", null, "string")); + } + + @Test + public void testEventsByCategory() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by eventCategory | sort - Count | head 5", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, schema("Count", null, "bigint"), schema("eventCategory", null, "string")); + verifyDataRows(response, rows(100, "Management")); + } + + @Test + public void testEventsByRegion() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `awsRegion` | sort - Count | head 10", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("awsRegion", null, "string")); + verifyDataRows( + response, + rows(12, "us-west-1"), + rows(12, "ca-central-1"), + rows(9, "us-west-2"), + rows(8, "ap-southeast-1"), + rows(8, "ap-northeast-1"), + rows(7, "us-east-2"), + rows(7, "sa-east-1"), + rows(7, "eu-north-1"), + rows(7, "ap-south-1"), + rows(6, "ap-southeast-2")); + } + + @Test + public void testTop10EventAPIs() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `eventName` | sort - Count | head 10", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("eventName", null, "string")); + } + + @Test + public void testTop10Services() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `eventSource` | sort - Count | head 10", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("eventSource", null, "string")); + verifyDataRows( + response, + rows(15, "ec2.amazonaws.com"), + rows(14, "s3.amazonaws.com"), + rows(13, "rds.amazonaws.com"), + rows(10, "dynamodb.amazonaws.com"), + rows(9, "cloudwatch.amazonaws.com"), + rows(8, "sts.amazonaws.com"), + rows(8, "lambda.amazonaws.com"), + rows(8, "iam.amazonaws.com"), + rows(8, "cloudformation.amazonaws.com"), + rows(7, "logs.amazonaws.com")); + } + + @Test + public void testTop10SourceIPs() throws IOException { + String query = + String.format( + "source=%s | WHERE NOT (sourceIPAddress LIKE '%%amazon%%.com%%') | STATS count() as" + + " Count by sourceIPAddress| SORT - Count| HEAD 10", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, schema("Count", null, "bigint"), schema("sourceIPAddress", null, "string")); + } + + @Test + public void testTop10UsersGeneratingEvents() throws IOException { + String query = + String.format( + "source=%s | where ISNOTNULL(`userIdentity.accountId`)| STATS count() as Count by" + + " `userIdentity.sessionContext.sessionIssuer.userName`, `userIdentity.accountId`," + + " `userIdentity.sessionContext.sessionIssuer.type` | rename" + + " `userIdentity.sessionContext.sessionIssuer.userName` as `User Name`," + + " `userIdentity.accountId` as `Account Id`," + + " `userIdentity.sessionContext.sessionIssuer.type` as `Type` | SORT - Count |" + + " HEAD 1000", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", null, "bigint"), + schema("User Name", null, "string"), + schema("Account Id", null, "string"), + schema("Type", null, "string")); + } + + @Test + public void testEC2ChangeEventCount() throws IOException { + String query = + String.format( + "source=%s | where eventSource like \\\"ec2%%\\\" and (eventName = \\\"RunInstances\\\"" + + " or eventName = \\\"TerminateInstances\\\" or eventName = \\\"StopInstances\\\")" + + " and not (eventName like \\\"Get%%\\\" or eventName like \\\"Describe%%\\\" or" + + " eventName like \\\"List%%\\\" or eventName like \\\"Head%%\\\") | stats count()" + + " as Count by eventName | sort - Count | head 5", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("eventName", null, "string")); + verifyDataRows(response, rows(1, "TerminateInstances"), rows(1, "RunInstances")); + } + + @Test + public void testEC2UsersBySessionIssuer() throws IOException { + String query = + String.format( + "source=%s | where isnotnull(`userIdentity.sessionContext.sessionIssuer.userName`) and" + + " `eventSource` like 'ec2%%' and not (`eventName` like 'Get%%' or `eventName`" + + " like 'Describe%%' or `eventName` like 'List%%' or `eventName` like 'Head%%') |" + + " stats count() as Count by `userIdentity.sessionContext.sessionIssuer.userName`" + + " | sort - Count | head 10", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", null, "bigint"), + schema("userIdentity.sessionContext.sessionIssuer.userName", null, "string")); + verifyDataRows( + response, rows(1, "Analyst"), rows(1, "DataEngineer"), rows(1, "ec2-service"), rows(1, "")); + } + + @Test + public void testEC2EventsByName() throws IOException { + String query = + String.format( + "source=%s | where `eventSource` like \\\"ec2%%\\\" and not (`eventName` like" + + " \\\"Get%%\\\" or `eventName` like \\\"Describe%%\\\" or `eventName` like" + + " \\\"List%%\\\" or `eventName` like \\\"Head%%\\\") | stats count() as Count by" + + " `eventName` | rename `eventName` as `Event Name` | sort - Count | head 10", + CLOUDTRAIL_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("Event Name", null, "string")); + verifyDataRows( + response, + rows(2, "CreateSecurityGroup"), + rows(1, "RunInstances"), + rows(1, "TerminateInstances")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/NfwPplDashboardIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/NfwPplDashboardIT.java new file mode 100644 index 00000000000..36c6a41a9fd --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/NfwPplDashboardIT.java @@ -0,0 +1,753 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.dashboard; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.sql.legacy.TestUtils; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class NfwPplDashboardIT extends PPLIntegTestCase { + + private static final String NFW_LOGS_INDEX = "nfw_logs"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadNfwLogsIndex(); + } + + private void loadNfwLogsIndex() throws IOException { + if (TestUtils.isIndexExist(client(), NFW_LOGS_INDEX)) { + Request deleteRequest = new Request("DELETE", "/" + NFW_LOGS_INDEX); + TestUtils.performRequest(client(), deleteRequest); + } + String mapping = TestUtils.getMappingFile("mappings/nfw_logs_index_mapping.json"); + TestUtils.createIndexByRestClient(client(), NFW_LOGS_INDEX, mapping); + TestUtils.loadDataByRestClient( + client(), + NFW_LOGS_INDEX, + "src/test/java/org/opensearch/sql/ppl/dashboard/testdata/nfw_logs.json"); + } + + @Test + public void testTopApplicationProtocols() throws IOException { + String query = + String.format( + "source=%s | where isnotnull(`event.app_proto`) | STATS count() as Count by" + + " `event.app_proto` | SORT - Count| HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", "bigint"), schema("event.app_proto", "string")); + verifyDataRows( + response, rows(89L, "http"), rows(5L, "unknown"), rows(2L, "tls"), rows(2L, "dns")); + } + + @Test + public void testTopSourceIPByPackets() throws IOException { + String query = + String.format( + "source=%s | stats sum(`event.netflow.pkts`) as packet_count by span(`event.timestamp`," + + " 2d) as timestamp_span, `event.src_ip` | rename `event.src_ip` as `Source IP` |" + + " sort - packet_count | head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("packet_count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("Source IP", "string")); + } + + @Test + public void testTopSourceIPByBytes() throws IOException { + String query = + String.format( + "source=%s | stats sum(`event.netflow.bytes`) as sum_bytes by span(`event.timestamp`," + + " 2d) as timestamp_span, `event.src_ip` | rename `event.src_ip` as `Source IP` |" + + " sort - sum_bytes | head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("sum_bytes", "bigint"), + schema("timestamp_span", "timestamp"), + schema("Source IP", "string")); + } + + @Test + public void testTopDestinationIPByPackets() throws IOException { + String query = + String.format( + "source=%s | stats sum(`event.netflow.pkts`) as packet_count by span(`event.timestamp`," + + " 2d) as timestamp_span, `event.dest_ip` | rename `event.dest_ip` as" + + " `Destination IP` | sort - packet_count | head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("packet_count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("Destination IP", "string")); + } + + @Test + public void testTopDestinationIPByBytes() throws IOException { + String query = + String.format( + "source=%s | stats sum(`event.netflow.bytes`) as bytes by span(`event.timestamp`, 2d)" + + " as timestamp_span, `event.dest_ip` | rename `event.dest_ip` as `Destination IP`" + + " | sort - bytes| head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("bytes", "bigint"), + schema("timestamp_span", "timestamp"), + schema("Destination IP", "string")); + } + + @Test + public void testTopSourceIPsByPacketsAndBytes() throws IOException { + String query = + String.format( + "source=%s | STATS SUM(`event.netflow.pkts`) as Packets, SUM(`event.netflow.bytes`) as" + + " Bytes by `event.src_ip` | RENAME `event.src_ip` as `Source IP` | SORT - Bytes," + + " Packets | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Packets", "bigint"), + schema("Bytes", "bigint"), + schema("Source IP", "string")); + } + + @Test + public void testTopDestinationIPsByPacketsAndBytes() throws IOException { + String query = + String.format( + "source=%s | STATS SUM(`event.netflow.pkts`) as Packets, SUM(`event.netflow.bytes`) as" + + " Bytes by `event.dest_ip`| RENAME `event.dest_ip` as `Destination IP` | SORT -" + + " Bytes, Packets| HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Packets", "bigint"), + schema("Bytes", "bigint"), + schema("Destination IP", "string")); + } + + @Test + public void testTopSourceAndDestinationPackets() throws IOException { + String query = + String.format( + "source=%s | stats sum(`event.netflow.pkts`) as packet_count by span(`event.timestamp`," + + " 2d) as timestamp_span, `event.src_ip`, `event.dest_ip` | eval `Src IP - Dst" + + " IP` = concat(`event.src_ip`, \\\"-\\\", `event.dest_ip`) | sort - packet_count" + + " | head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("packet_count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("Src IP - Dst IP", "string")); + } + + @Test + public void testTopSourceAndDestinationByBytes() throws IOException { + String query = + String.format( + "source=%s | stats sum(`event.netflow.bytes`) as bytes by span(`event.timestamp`, 2d)" + + " as timestamp_span, `event.src_ip`, `event.dest_ip` | eval `Src IP - Dst IP` =" + + " concat(`event.src_ip`, \\\"-\\\", `event.dest_ip`) | sort - bytes | head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("bytes", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("Src IP - Dst IP", "string")); + } + + @Test + public void testTopHTTPHostHeaders() throws IOException { + String query = + String.format( + "source=%s | where `event.alert.action` =" + + " \\\"allowed\\\"| stats count() as event_count by span(`event.timestamp`, 2d) as" + + " time_bucket, `event.http.hostname` | rename `event.http.hostname` as" + + " `Hostname`| sort - event_count", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("event_count", "bigint"), + schema("time_bucket", "timestamp"), + schema("Hostname", "string")); + verifyDataRows(response, rows(1L, "2025-03-27 00:00:00", null)); + } + + @Test + public void testTopBlockedHTTPHostHeaders() throws IOException { + String query = + String.format( + "source=%s | where `event.alert.action` = \\\"blocked\\\" and" + + " isnotnull(`event.http.hostname`) | stats count() as event_count by" + + " span(`event.timestamp`, 2d) as time_bucket, `event.http.hostname` | rename" + + " `event.http.hostname` as `Hostname` | sort - event_count |" + + " HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("event_count", "bigint"), + schema("time_bucket", "timestamp"), + schema("Hostname", "string")); + verifyDataRows(response, rows(1L, "2025-02-23 00:00:00", "checkip.amazonaws.com")); + } + + @Test + public void testTopAllowedTLSSNI() throws IOException { + String query = + String.format( + "source=%s | where `event.alert.action` = \\\"allowed\\\"| stats count() as event_count" + + " by span(`event.timestamp`, 2d) as time_bucket, `event.tls.sni`| rename" + + " `event.tls.sni` as `Hostname` | sort - event_count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("event_count", "bigint"), + schema("time_bucket", "timestamp"), + schema("Hostname", "string")); + verifyDataRows(response, rows(1L, "2025-03-27 00:00:00", null)); + } + + @Test + public void testTopBlockedTLSSNI() throws IOException { + String query = + String.format( + "source=%s | where `event.alert.action` = \\\"blocked\\\" and" + + " isnotnull(`event.tls.sni`)| stats count() as event_count by" + + " span(`event.timestamp`, 2d) as time_bucket, `event.tls.sni` | rename" + + " `event.tls.sni` as `Hostname` | sort - event_count| HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("event_count", "bigint"), + schema("time_bucket", "timestamp"), + schema("Hostname", "string")); + verifyDataRows( + response, + rows(1L, "2025-02-23 00:00:00", "checkip.amazonaws.com"), + rows(1L, "2025-03-27 00:00:00", "s3.us-east-1.amazonaws.com")); + } + + @Test + public void testTopHTTPURIPaths() throws IOException { + String query = + String.format( + "source=%s | where isnotnull(`event.http.url`)| stats count() as event_count by" + + " span(`event.timestamp`, 2d) as timestamp_span, `event.http.url`| rename" + + " `event.http.url` as `URL` | sort - event_count| head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("event_count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("URL", "string")); + verifyDataRows(response, rows(1L, "2025-02-23 00:00:00", "/")); + } + + @Test + public void testTopHTTPUserAgents() throws IOException { + String query = + String.format( + "source=%s | where isnotnull(`event.http.http_user_agent`) | stats count() as" + + " event_count by span(`event.timestamp`, 2d) as timestamp_span," + + " `event.http.http_user_agent` | rename `event.http.http_user_agent` as `User" + + " Agent` | sort - event_count| head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("event_count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("User Agent", "string")); + verifyDataRows(response, rows(1L, "2025-02-23 00:00:00", "curl/8.5.0")); + } + + @Test + public void testTopPrivateLinkEndpointCandidates() throws IOException { + String query = + String.format( + "source=%s | where (`event.tls.sni` like 's3%%') or (`event.http.hostname` like" + + " 's3%%') or (`event.tls.sni` like 'dynamodb%%') or (`event.http.hostname` like" + + " 'dynamodb%%') or (`event.tls.sni` like 'backup%%') or (`event.http.hostname`" + + " like 'backup%%')| STATS count() as Count by `event.src_ip`, `event.dest_ip`," + + " `event.app_proto`, `event.tls.sni`, `event.http.hostname` | rename" + + " `event.tls.sni` as SNI, `event.dest_ip` as Dest_IP , `event.src_ip` as" + + " Source_IP, `event.http.hostname` as Hostname, `event.app_proto` as App_Proto |" + + " SORT - Count", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("SNI", "string"), + schema("Dest_IP", "string"), + schema("Source_IP", "string"), + schema("Hostname", "string"), + schema("App_Proto", "string")); + verifyDataRows( + response, + rows(1L, "10.2.1.120", "52.216.211.88", "tls", "s3.us-east-1.amazonaws.com", null)); + } + + @Test + public void testTopProtocols() throws IOException { + String query = + String.format( + "source=%s | STATS count() as Count by `event.proto`| SORT - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", "bigint"), schema("event.proto", "string")); + verifyDataRows(response, rows(95L, "TCP"), rows(2L, "UDP"), rows(3L, "ICMP")); + } + + @Test + public void testTopSourcePorts() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span," + + " `event.src_port` | eval `Source Port` = CAST(`event.src_port` AS STRING) | sort" + + " - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_port", "bigint"), + schema("Source Port", "string")); + } + + @Test + public void testTopDestinationPorts() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span," + + " `event.dest_port` | eval `Destination Port` = CAST(`event.dest_port` AS STRING)" + + " | sort - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.dest_port", "bigint"), + schema("Destination Port", "string")); + } + + @Test + public void testTopTCPFlows() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = \\\"TCP\\\" | STATS count() as Count by" + + " SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`," + + " `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, \\\" -" + + " \\\", `event.dest_ip`, \\\": \\\", CAST(`event.dest_port` AS STRING)) | SORT -" + + " Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopTCPFlowsByPackets() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = \\\"TCP\\\" | STATS sum(`event.netflow.pkts`) as" + + " Packets by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`," + + " `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` =" + + " CONCAT(`event.src_ip`, \\\" - \\\", `event.dest_ip`, \\\": \\\"," + + " CAST(`event.dest_port` AS STRING)) | SORT - Packets | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Packets", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopTCPFlowsByBytes() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = \\\"TCP\\\" | STATS sum(event.netflow.bytes) as" + + " Bytes by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`," + + " `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` =" + + " CONCAT(`event.src_ip`, \\\" - \\\", `event.dest_ip`, \\\": \\\"," + + " CAST(`event.dest_port` AS STRING)) | SORT - Bytes | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Bytes", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopTCPFlags() throws IOException { + String query = + String.format( + "source=%s | STATS count() as Count by `event.tcp.tcp_flags` | SORT - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", "bigint"), schema("event.tcp.tcp_flags", "string")); + verifyDataRows( + response, + rows(8L, null), + rows(4L, "13"), + rows(4L, "17"), + rows(3L, "0"), + rows(3L, "1"), + rows(3L, "15"), + rows(3L, "16"), + rows(3L, "18"), + rows(3L, "19"), + rows(3L, "2")); + } + + @Test + public void testTopUDPFlows() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = \\\"UDP\\\"| STATS count() as Count by" + + " SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`," + + " `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, \\\" -" + + " \\\", `event.dest_ip`, \\\": \\\", CAST(`event.dest_port` AS STRING)) | SORT -" + + " Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopUDPFlowsByPackets() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = \\\"UDP\\\" | STATS sum(`event.netflow.pkts`) as" + + " Packets by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`," + + " `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` =" + + " CONCAT(`event.src_ip`, \\\" - \\\", `event.dest_ip`, \\\": \\\"," + + " CAST(`event.dest_port` AS STRING)) | SORT - Packets | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Packets", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopUDPFlowsByBytes() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = \\\"UDP\\\" | STATS sum(`event.netflow.bytes`) as" + + " Bytes by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`," + + " `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` =" + + " CONCAT(`event.src_ip`, \\\" - \\\", `event.dest_ip`, \\\": \\\"," + + " CAST(`event.dest_port` AS STRING)) | SORT - Bytes | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Bytes", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopICMPFlows() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = \\\"ICMP\\\" | STATS count() as Count by" + + " SPAN(`event.timestamp`, 1d) as timestamp_span, `event.src_ip`, `event.dest_ip`," + + " `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, \\\" -" + + " \\\", `event.dest_ip`, \\\": \\\", CAST(`event.dest_port` AS STRING)) | SORT -" + + " Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopDropRejectRules() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.alert.action` = \\\"blocked\\\"| STATS count() as Count by" + + " `event.alert.signature_id`, `event.alert.action`, `event.alert.signature`," + + " `event.proto`| RENAME `event.alert.signature_id` as SID, `event.alert.action`" + + " as Action, `event.alert.signature` as Message, `event.proto` as Proto | SORT -" + + " Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("SID", "bigint"), + schema("Action", "string"), + schema("Message", "string"), + schema("Proto", "string")); + } + + @Test + public void testTopAllowedRules() throws IOException { + String query = + String.format( + "source=%s | where `event.alert.action` = \\\"allowed\\\" | stats count() as Count by" + + " `event.alert.signature_id`, `event.alert.action`, `event.alert.signature`," + + " `event.proto` | rename `event.alert.signature_id` as SID, `event.alert.action`" + + " as Action, `event.alert.signature` as Message, `event.proto` as Proto | sort -" + + " Count | head 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("SID", "bigint"), + schema("Action", "string"), + schema("Message", "string"), + schema("Proto", "string")); + } + + @Test + public void testTopBlockedSourceIPs() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.alert.action` = \\\"blocked\\\" | STATS COUNT() as Count by" + + " `event.src_ip` | SORT - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", "bigint"), schema("event.src_ip", "string")); + verifyDataRows(response, rows(4L, "10.170.18.235"), rows(1L, "10.2.1.120")); + } + + @Test + public void testTopBlockedDestinationIPs() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.alert.action` = \\\"blocked\\\" | STATS COUNT() as Count by" + + " `event.dest_ip` | SORT - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", "bigint"), schema("event.dest_ip", "string")); + verifyDataRows( + response, + rows(2L, "8.8.8.8"), + rows(1L, "54.146.42.172"), + rows(1L, "54.242.115.112"), + rows(1L, "52.216.211.88")); + } + + @Test + public void testTopBlockedDestinationPorts() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.alert.action` = \\\"blocked\\\" | STATS COUNT() as `Count` by" + + " `event.dest_port` | EVAL `Destination Port` = CAST(`event.dest_port` as STRING)" + + " | SORT - `Count` | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("event.dest_port", "bigint"), + schema("Destination Port", "string")); + } + + @Test + public void testTopBlockedRemoteAccessPorts() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.alert.action` = \\\"blocked\\\" | STATS count() as Count by" + + " SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`," + + " `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, \\\" -" + + " \\\", `event.dest_ip`, \\\": \\\", CAST(`event.dest_port` AS STRING)) | SORT -" + + " Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopBlockedTCPFlows() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.alert.action` = 'blocked' and `event.proto` = 'TCP' | STATS" + + " count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span," + + " `event.src_ip`, `event.dest_ip`, `event.dest_port`| EVAL `Src IP - Dst IP:Port`" + + " = CONCAT(`event.src_ip`, \\\" - \\\", `event.dest_ip`, \\\": \\\"," + + " CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopBlockedUDPFlows() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.alert.action` = 'blocked' and `event.proto` = 'UDP' | STATS" + + " count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span," + + " `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst" + + " IP:Port` = CONCAT(`event.src_ip`, \\\" - \\\", `event.dest_ip`, \\\": \\\"," + + " CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP - Dst IP:Port", "string")); + } + + @Test + public void testTopTCPFlowsSynWithoutSynAck() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = 'TCP' and `event.tcp.syn` = \\\"true\\\" and" + + " `event.tcp.ack` = \\\"true\\\" | STATS count() as Count by" + + " SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`," + + " `event.src_port`, `event.dest_ip`, `event.dest_port`| EVAL `Src IP:Port - Dst" + + " IP:Port` = CONCAT(`event.src_ip`, \\\": \\\", CAST(`event.src_port` AS STRING)," + + " \\\" - \\\", `event.dest_ip`, \\\": \\\", CAST(`event.dest_port` AS STRING)) |" + + " SORT - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.src_port", "bigint"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP:Port - Dst IP:Port", "string")); + } + + @Test + public void testTopLongLivedTCPFlows() throws IOException { + String query = + String.format( + "source=%s | WHERE `event.proto` = 'TCP' and `event.netflow.age` > 350 | STATS count()" + + " as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`," + + " `event.src_port`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP:Port - Dst" + + " IP:Port` = CONCAT(`event.src_ip`, \\\": \\\", CAST(`event.src_port` AS STRING)," + + " \\\" - \\\", `event.dest_ip`, \\\": \\\", CAST(`event.dest_port` AS STRING)) |" + + " SORT - Count | HEAD 10", + NFW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", "bigint"), + schema("timestamp_span", "timestamp"), + schema("event.src_ip", "string"), + schema("event.src_port", "bigint"), + schema("event.dest_ip", "string"), + schema("event.dest_port", "bigint"), + schema("Src IP:Port - Dst IP:Port", "string")); + verifyDataRows( + response, + rows( + 1L, + "2025-03-27 00:00:00", + "45.82.78.100", + 52610L, + "10.2.1.120", + 8085L, + "45.82.78.100: 52610 - 10.2.1.120: 8085"), + rows( + 1L, + "2025-03-27 00:00:00", + "20.65.193.116", + 45550L, + "10.2.1.120", + 1433L, + "20.65.193.116: 45550 - 10.2.1.120: 1433")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/README.md b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/README.md new file mode 100644 index 00000000000..fbb881ffdca --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/README.md @@ -0,0 +1,64 @@ + + +# Dashboard Integration Tests + +This directory contains documentation and integration tests for OpenSearch dashboard-related PPL queries. + +## Overview + +Dashboard integration tests ensure that PPL queries used in various OpenSearch dashboards continue to work correctly as the SQL plugin evolves. These tests provide regression protection and validate query compatibility. + +## Dashboard Test Documentation + +### CloudTrail Dashboard +- **[CloudTrail PPL Integration Tests](templates/dashboard/cloudtrail.rst)** - Tests for CloudTrail log dashboard queries + - Validates AWS API call analysis queries + - Tests user activity and security monitoring + +### Network Firewall (NFW) Dashboard +- **[NFW PPL Integration Tests](templates/dashboard/nfw.rst)** - Tests for Network Firewall log dashboard queries + - Validates network security analysis queries + - Tests firewall rule and traffic monitoring + +### VPC Dashboard +- **[VPC PPL Integration Tests](templates/dashboard/vpc.rst)** - Tests for VPC flow log dashboard queries + - Validates network traffic analysis queries + - Tests top talkers, destinations, bytes, and packets analysis + +### WAF Dashboard +- **[WAF PPL Integration Tests](templates/dashboard/waf.rst)** - Tests for WAF log dashboard queries + - Includes nested httpRequest object handling + - Validates web application firewall analysis queries + - Tests blocked requests and rule analysis + +## Adding New Dashboard Tests + +When creating tests for new dashboard types: + +1. Create a new test class in this directory +2. Add test data files in `testdata/` +3. Add index mappings in `mappings/` +4. Add test template files in `templates/dashboard/` +5. Document the tests in this README + +## Test Structure + +Each dashboard test should include: +- **Query Pattern Validation** - Ensure all dashboard queries parse correctly +- **Real Data Testing** - Test with realistic sample data +- **Schema Validation** - Verify field types and query results +- **Data Validation** - Confirm expected result counts and values + +## Running Dashboard Tests + +```bash +# Run all dashboard-related PPL tests +./gradlew :integ-test:test --tests "*Dashboard*" + +# Run specific dashboard tests +./gradlew :integ-test:test --tests "*VpcPplDashboardIT*" +./gradlew :integ-test:test --tests "*WafPplDashboardIT*" +``` \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/VpcFlowLogsPplDashboardIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/VpcFlowLogsPplDashboardIT.java new file mode 100644 index 00000000000..d0b677dbec1 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/VpcFlowLogsPplDashboardIT.java @@ -0,0 +1,268 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.dashboard; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.legacy.TestUtils; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** Integration tests for VPC Flow Logs PPL dashboard queries. */ +public class VpcFlowLogsPplDashboardIT extends PPLIntegTestCase { + + private static final String VPC_FLOW_LOGS_INDEX = "vpc_flow_logs"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadVpcFlowLogsIndex(); + } + + private void loadVpcFlowLogsIndex() throws IOException { + if (!TestUtils.isIndexExist(client(), VPC_FLOW_LOGS_INDEX)) { + String mapping = TestUtils.getMappingFile("mappings/vpc_logs_index_mapping.json"); + TestUtils.createIndexByRestClient(client(), VPC_FLOW_LOGS_INDEX, mapping); + TestUtils.loadDataByRestClient( + client(), + VPC_FLOW_LOGS_INDEX, + "src/test/java/org/opensearch/sql/ppl/dashboard/testdata/vpc_logs.json"); + } + } + + @Test + public void testTotalRequests() throws IOException { + String query = String.format("source=%s | stats count()", VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("count()", null, "bigint")); + verifyDataRows(response, rows(100)); + } + + @Test + public void testTotalFlowsByActions() throws IOException { + String query = + String.format( + "source=%s | STATS count() as Count by action | SORT - Count | HEAD 5", + VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("action", null, "string")); + verifyDataRows(response, rows(92, "ACCEPT"), rows(8, "REJECT")); + } + + @Test + public void testFlowsOvertime() throws IOException { + String query = + String.format("source=%s | STATS count() by span(`start`, 30d)", VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("count()", null, "bigint"), + schema("span(`start`,30d)", null, "timestamp")); + verifyDataRows( + response, + rows(6, "2025-04-12 00:00:00"), + rows(24, "2025-05-12 00:00:00"), + rows(17, "2025-06-11 00:00:00"), + rows(12, "2025-07-11 00:00:00"), + rows(17, "2025-08-10 00:00:00"), + rows(13, "2025-09-09 00:00:00"), + rows(11, "2025-10-09 00:00:00")); + } + + @Test + public void testBytesTransferredOverTime() throws IOException { + String query = + String.format("source=%s | STATS sum(bytes) by span(`start`, 30d)", VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("sum(bytes)", null, "bigint"), + schema("span(`start`,30d)", null, "timestamp")); + verifyDataRows( + response, + rows(385560, "2025-04-12 00:00:00"), + rows(1470623, "2025-05-12 00:00:00"), + rows(1326170, "2025-06-11 00:00:00"), + rows(946422, "2025-07-11 00:00:00"), + rows(826957, "2025-08-10 00:00:00"), + rows(719758, "2025-09-09 00:00:00"), + rows(643042, "2025-10-09 00:00:00")); + } + + @Test + public void testPacketsTransferredOverTime() throws IOException { + String query = + String.format("source=%s | STATS sum(packets) by span(`start`, 30d)", VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("sum(packets)", null, "bigint"), + schema("span(`start`,30d)", null, "timestamp")); + verifyDataRows( + response, + rows(360, "2025-04-12 00:00:00"), + rows(1715, "2025-05-12 00:00:00"), + rows(1396, "2025-06-11 00:00:00"), + rows(804, "2025-07-11 00:00:00"), + rows(941, "2025-08-10 00:00:00"), + rows(890, "2025-09-09 00:00:00"), + rows(709, "2025-10-09 00:00:00")); + } + + @Test + public void testTopDestinationByBytes() throws IOException { + String query = + String.format( + "source=%s | stats sum(bytes) as Bytes by dstaddr | sort - Bytes | head 10", + VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Bytes", null, "bigint"), schema("dstaddr", null, "string")); + verifyDataRows( + response, + rows(267655, "10.0.113.54"), + rows(259776, "11.111.108.48"), + rows(214512, "223.252.77.226"), + rows(210396, "10.0.194.75"), + rows(192355, "10.0.11.144"), + rows(187200, "120.67.35.74"), + rows(183353, "10.0.167.74"), + rows(182055, "10.0.74.110"), + rows(176391, "10.0.3.220"), + rows(175820, "10.0.83.167")); + } + + @Test + public void testTopTalkersByBytes() throws IOException { + String query = + String.format( + "source=%s | stats sum(bytes) as Bytes by srcaddr | sort - Bytes | head 10", + VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Bytes", null, "bigint"), schema("srcaddr", null, "string")); + verifyDataRows( + response, + rows(267655, "121.65.198.154"), + rows(259776, "10.0.91.27"), + rows(214512, "10.0.165.194"), + rows(210396, "6.186.106.13"), + rows(192355, "182.53.30.77"), + rows(187200, "10.0.163.249"), + rows(183353, "30.193.135.22"), + rows(182055, "213.227.231.57"), + rows(176391, "39.40.182.87"), + rows(175820, "10.0.14.9")); + } + + @Test + public void testTopTalkersByPackets() throws IOException { + String query = + String.format( + "source=%s | stats sum(packets) as Packets by srcaddr | sort - Packets | head 10", + VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Packets", null, "bigint"), schema("srcaddr", null, "string")); + verifyDataRows( + response, + rows(200, "10.0.163.249"), + rows(199, "121.65.198.154"), + rows(198, "10.0.91.27"), + rows(197, "6.186.106.13"), + rows(181, "115.27.64.3"), + rows(181, "30.193.135.22"), + rows(176, "10.0.227.35"), + rows(174, "10.0.99.147"), + rows(171, "10.0.231.176"), + rows(164, "10.0.165.194")); + } + + @Test + public void testTopDestinationsByPackets() throws IOException { + String query = + String.format( + "source=%s | stats sum(packets) as Packets by dstaddr | sort - Packets | head 10", + VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Packets", null, "bigint"), schema("dstaddr", null, "string")); + verifyDataRows( + response, + rows(200, "120.67.35.74"), + rows(199, "10.0.113.54"), + rows(198, "11.111.108.48"), + rows(197, "10.0.194.75"), + rows(181, "10.0.167.74"), + rows(181, "10.0.159.18"), + rows(176, "10.0.62.137"), + rows(174, "182.58.134.190"), + rows(171, "34.55.235.91"), + rows(164, "118.124.149.78")); + } + + @Test + public void testTopTalkersByIPs() throws IOException { + String query = + String.format( + "source=%s | STATS count() as Count by srcaddr | SORT - Count | HEAD 10", + VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("srcaddr", null, "string")); + verifyDataRows( + response, + rows(1, "1.24.59.183"), + rows(1, "10.0.101.123"), + rows(1, "10.0.107.121"), + rows(1, "10.0.107.130"), + rows(1, "10.0.108.29"), + rows(1, "10.0.115.237"), + rows(1, "10.0.117.121"), + rows(1, "10.0.126.80"), + rows(1, "10.0.13.162"), + rows(1, "10.0.132.168")); + } + + @Test + public void testTopDestinationsByIPs() throws IOException { + String query = + String.format( + "source=%s | stats count() as Requests by dstaddr | sort - Requests | head 10", + VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema(response, schema("Requests", null, "bigint"), schema("dstaddr", null, "string")); + verifyDataRows( + response, + rows(1, "10.0.100.62"), + rows(1, "10.0.107.6"), + rows(1, "10.0.109.2"), + rows(1, "10.0.11.144"), + rows(1, "10.0.113.54"), + rows(1, "10.0.116.210"), + rows(1, "10.0.118.54"), + rows(1, "10.0.127.142"), + rows(1, "10.0.138.175"), + rows(1, "10.0.147.33")); + } + + @Test + public void testTopTalkersByHeatMap() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by dstaddr, srcaddr | sort - Count | head 100", + VPC_FLOW_LOGS_INDEX); + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", null, "bigint"), + schema("dstaddr", null, "string"), + schema("srcaddr", null, "string")); + assertEquals(100, response.getJSONArray("datarows").length()); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/WafPplDashboardIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/WafPplDashboardIT.java new file mode 100644 index 00000000000..a52f7d84b65 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/WafPplDashboardIT.java @@ -0,0 +1,200 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.dashboard; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.legacy.TestUtils; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Integration tests for WAF PPL dashboard queries. These tests ensure that WAF-related PPL queries + * work correctly with actual test data. + */ +public class WafPplDashboardIT extends PPLIntegTestCase { + + private static final String WAF_LOGS_INDEX = "waf_logs"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadWafLogsIndex(); + } + + private void loadWafLogsIndex() throws IOException { + if (!TestUtils.isIndexExist(client(), WAF_LOGS_INDEX)) { + String mapping = TestUtils.getMappingFile("mappings/waf_logs_index_mapping.json"); + TestUtils.createIndexByRestClient(client(), WAF_LOGS_INDEX, mapping); + TestUtils.loadDataByRestClient( + client(), + WAF_LOGS_INDEX, + "src/test/java/org/opensearch/sql/ppl/dashboard/testdata/waf_logs.json"); + } + } + + @Test + public void testTotalRequests() throws IOException { + String query = String.format("source=%s | stats count()", WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("count()", null, "bigint")); + verifyDataRows(response, rows(100)); + } + + @Test + public void testRequestsHistory() throws IOException { + String query = + String.format( + "source=%s | STATS count() as Count by span(start_time, 30d), action | SORT - Count", + WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, + schema("Count", null, "bigint"), + schema("span(start_time,30d)", null, "timestamp"), + schema("action", null, "string")); + } + + @Test + public void testRequestsToWebACLs() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `webaclId` | sort - Count | head 10", + WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("webaclId", null, "string")); + } + + @Test + public void testSources() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `httpSourceId` | sort - Count | head 5", + WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("Count", null, "bigint"), schema("httpSourceId", null, "string")); + } + + @Test + public void testTopClientIPs() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `httpRequest.clientIp` | sort - Count |" + + " head 10", + WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, schema("Count", null, "bigint"), schema("httpRequest.clientIp", null, "string")); + } + + @Test + public void testTopCountries() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `httpRequest.country` | sort - Count ", + WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, schema("Count", null, "bigint"), schema("httpRequest.country", null, "string")); + verifyDataRows( + response, + rows(33, "US"), + rows(8, "GB"), + rows(7, "DE"), + rows(7, "BR"), + rows(6, "CA"), + rows(5, "RU"), + rows(3, "JP"), + rows(3, "IN"), + rows(3, "CN"), + rows(3, "BE"), + rows(2, "SG"), + rows(2, "SE"), + rows(2, "MX"), + rows(2, "IE"), + rows(2, "ES"), + rows(2, "CH"), + rows(2, "AU"), + rows(1, "ZA"), + rows(1, "PT"), + rows(1, "NL"), + rows(1, "IT"), + rows(1, "FR"), + rows(1, "FI"), + rows(1, "CL"), + rows(1, "AT")); + } + + @Test + public void testTopTerminatingRules() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `terminatingRuleId` | sort - Count | head 10", + WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, schema("Count", null, "bigint"), schema("terminatingRuleId", null, "string")); + verifyDataRows( + response, + rows(13, "AWS-AWSManagedRulesAmazonIpReputationList"), + rows(11, "XSSProtectionRule"), + rows(11, "Default_Action"), + rows(10, "AWS-AWSManagedRulesKnownBadInputsRuleSet"), + rows(8, "CustomRateLimitRule"), + rows(8, "AWS-AWSManagedRulesCommonRuleSet"), + rows(7, "CustomIPWhitelistRule"), + rows(7, "AWS-AWSManagedRulesSQLiRuleSet"), + rows(7, "AWS-AWSManagedRulesLinuxRuleSet"), + rows(5, "CSRFProtectionRule")); + } + + @Test + public void testTopRequestURIs() throws IOException { + String query = + String.format( + "source=%s | stats count() as Count by `httpRequest.uri` | sort - Count | head 10", + WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema( + response, schema("Count", null, "bigint"), schema("httpRequest.uri", null, "string")); + verifyDataRows( + response, + rows(5, "/api/v2/search"), + rows(5, "/account"), + rows(4, "/products"), + rows(4, "/css/style.css"), + rows(3, "/test"), + rows(3, "/download"), + rows(3, "/docs"), + rows(3, "/billing"), + rows(3, "/api/v2/users"), + rows(2, "/about")); + } + + @Test + public void testTotalBlockedRequests() throws IOException { + String query = + String.format("source=%s | WHERE action = \\\"BLOCK\\\" | STATS count()", WAF_LOGS_INDEX); + + JSONObject response = executeQuery(query); + verifySchema(response, schema("count()", null, "bigint")); + verifyDataRows(response, rows(21)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/cloudtrail_logs_index_mapping.json b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/cloudtrail_logs_index_mapping.json new file mode 100644 index 00000000000..04ea80f9bb9 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/cloudtrail_logs_index_mapping.json @@ -0,0 +1,140 @@ +{ + "mappings": { + "properties": { + "eventVersion": { + "type": "keyword" + }, + "userIdentity": { + "properties": { + "type": { + "type": "keyword" + }, + "principalId": { + "type": "keyword" + }, + "arn": { + "type": "keyword" + }, + "accountId": { + "type": "keyword" + }, + "userName": { + "type": "keyword" + }, + "sessionContext": { + "properties": { + "sessionIssuer": { + "properties": { + "type": { + "type": "keyword" + }, + "principalId": { + "type": "keyword" + }, + "arn": { + "type": "keyword" + }, + "accountId": { + "type": "keyword" + }, + "userName": { + "type": "keyword" + } + } + }, + "webIdFederationData": { + "type": "object" + }, + "attributes": { + "properties": { + "mfaAuthenticated": { + "type": "keyword" + }, + "creationDate": { + "type": "date" + } + } + } + } + } + } + }, + "eventTime": { + "type": "date" + }, + "start_time": { + "type": "date" + }, + "eventSource": { + "type": "keyword" + }, + "eventName": { + "type": "keyword" + }, + "awsRegion": { + "type": "keyword" + }, + "sourceIPAddress": { + "type": "keyword" + }, + "userAgent": { + "type": "keyword" + }, + "requestParameters": { + "properties": { + "roleArn": { + "type": "keyword" + }, + "roleSessionName": { + "type": "keyword" + }, + "keyId": { + "type": "keyword" + }, + "encryptionContext": { + "type": "object" + }, + "bucketName": { + "type": "keyword" + }, + "Host": { + "type": "keyword" + }, + "x-amz-expected-bucket-owner": { + "type": "keyword" + }, + "logGroupName": { + "type": "keyword" + } + } + }, + "responseElements": { + "type": "object" + }, + "requestID": { + "type": "keyword" + }, + "eventID": { + "type": "keyword" + }, + "readOnly": { + "type": "boolean" + }, + "eventType": { + "type": "keyword" + }, + "managementEvent": { + "type": "boolean" + }, + "recipientAccountId": { + "type": "keyword" + }, + "eventCategory": { + "type": "keyword" + }, + "errorCode": { + "type": "keyword" + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/nfw_logs_index_mapping.json b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/nfw_logs_index_mapping.json new file mode 100644 index 00000000000..df44c3152c0 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/nfw_logs_index_mapping.json @@ -0,0 +1,117 @@ +{ + "mappings": { + "properties": { + "firewall_name": { + "type": "keyword" + }, + "availability_zone": { + "type": "keyword" + }, + "event_timestamp": { + "type": "keyword" + }, + "event": { + "properties": { + "timestamp": { + "type": "date" + }, + "src_ip": { + "type": "keyword" + }, + "dest_ip": { + "type": "keyword" + }, + "src_port": { + "type": "integer" + }, + "dest_port": { + "type": "integer" + }, + "proto": { + "type": "keyword" + }, + "app_proto": { + "type": "keyword" + }, + "event_type": { + "type": "keyword" + }, + "flow_id": { + "type": "long" + }, + "netflow": { + "properties": { + "pkts": { + "type": "integer" + }, + "bytes": { + "type": "integer" + }, + "start": { + "type": "date" + }, + "end": { + "type": "date" + }, + "age": { + "type": "integer" + }, + "min_ttl": { + "type": "integer" + }, + "max_ttl": { + "type": "integer" + } + } + }, + "tcp": { + "properties": { + "tcp_flags": { + "type": "keyword" + }, + "syn": { + "type": "boolean" + }, + "ack": { + "type": "boolean" + } + } + }, + "tls": { + "properties": { + "sni": { + "type": "keyword" + } + } + }, + "http": { + "properties": { + "hostname": { + "type": "keyword" + }, + "url": { + "type": "keyword" + }, + "http_user_agent": { + "type": "keyword" + } + } + }, + "alert": { + "properties": { + "action": { + "type": "keyword" + }, + "signature_id": { + "type": "integer" + }, + "signature": { + "type": "keyword" + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/vpc_logs_index_mapping.json b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/vpc_logs_index_mapping.json new file mode 100644 index 00000000000..d3c4545fb22 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/vpc_logs_index_mapping.json @@ -0,0 +1,72 @@ +{ + "mappings": { + "properties": { + "version": { + "type": "keyword" + }, + "account-id": { + "type": "keyword" + }, + "interface-id": { + "type": "keyword" + }, + "region": { + "type": "keyword" + }, + "vpc-id": { + "type": "keyword" + }, + "subnet-id": { + "type": "keyword" + }, + "az-id": { + "type": "keyword" + }, + "instance-id": { + "type": "keyword" + }, + "srcaddr": { + "type": "keyword" + }, + "dstaddr": { + "type": "keyword" + }, + "srcport": { + "type": "integer" + }, + "dstport": { + "type": "integer" + }, + "protocol": { + "type": "keyword" + }, + "packets": { + "type": "long" + }, + "bytes": { + "type": "long" + }, + "pkt-src-aws-service": { + "type": "keyword" + }, + "pkt-dst-aws-service": { + "type": "keyword" + }, + "flow-direction": { + "type": "keyword" + }, + "start": { + "type": "date" + }, + "end": { + "type": "date" + }, + "action": { + "type": "keyword" + }, + "log-status": { + "type": "keyword" + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/waf_logs_index_mapping.json b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/waf_logs_index_mapping.json new file mode 100644 index 00000000000..48cc541eab3 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/mappings/waf_logs_index_mapping.json @@ -0,0 +1,104 @@ +{ + "mappings": { + "properties": { + "@timestamp": { + "type": "date" + }, + "start_time": { + "type": "date" + }, + "timestamp": { + "type": "long" + }, + "webaclId": { + "type": "keyword" + }, + "action": { + "type": "keyword" + }, + "formatVersion": { + "type": "integer" + }, + "httpSourceName": { + "type": "keyword" + }, + "httpRequest": { + "properties": { + "clientIp": { + "type": "keyword" + }, + "country": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + }, + "httpMethod": { + "type": "keyword" + } + } + }, + "httpSourceId": { + "type": "keyword" + }, + "terminatingRuleId": { + "type": "keyword" + }, + "terminatingRuleType": { + "type": "keyword" + }, + "ruleGroupList.ruleId": { + "type": "keyword" + }, + "aws": { + "properties": { + "waf": { + "properties": { + "webaclId": { + "type": "keyword" + }, + "action": { + "type": "keyword" + }, + "httpRequest": { + "properties": { + "clientIp": { + "type": "keyword" + }, + "country": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + }, + "httpMethod": { + "type": "keyword" + } + } + }, + "httpSourceId": { + "type": "keyword" + }, + "terminatingRuleId": { + "type": "keyword" + }, + "RuleType": { + "type": "keyword" + }, + "ruleGroupList": { + "properties": { + "ruleId": { + "type": "keyword" + } + } + }, + "event_count": { + "type": "long" + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/cloudtrail.rst b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/cloudtrail.rst new file mode 100644 index 00000000000..dde961e75ee --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/cloudtrail.rst @@ -0,0 +1,294 @@ +.. + Copyright OpenSearch Contributors + SPDX-License-Identifier: Apache-2.0 + +============================ +CloudTrail Dashboard Queries +============================ + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + +Description +=========== + +CloudTrail PPL queries analyze AWS API activity, user behavior, and security events. These queries demonstrate common dashboard patterns for CloudTrail log analysis. + +.. note:: + Some queries may return results in different orders when multiple records have equal values. This is expected database behavior and does not affect the correctness of the results. For queries with non-deterministic ordering, sample outputs may not be shown, but the query structure and schema remain valid. + +Event Analysis +============== + +Total Events Count +------------------ + +Basic count aggregation for total events. + +PPL query:: + + os> source=cloudtrail_logs | stats count() as `Event Count`; + fetched rows / total rows = 1/1 + +-------------+ + | Event Count | + |-------------| + | 100 | + +-------------+ + +Events Over Time +---------------- + +Count by timestamp for event history. + +PPL query:: + + os> source=cloudtrail_logs | stats count() by span(start_time, 30d); + fetched rows / total rows = 1/1 + +----------+------------------------+ + | count() | span(start_time,30d) | + |----------|------------------------| + | 100 | 2025-05-01T00:00:00Z | + +----------+------------------------+ + +Events by Account IDs +--------------------- + +Account-based event aggregation with null filtering. + +PPL query:: + + os> source=cloudtrail_logs | where isnotnull(userIdentity.accountId) | stats count() as Count by userIdentity.accountId | sort - Count | head 10; + fetched rows / total rows = 10/10 + +-------+-------------------------+ + | Count | userIdentity.accountId | + |-------|-------------------------| + | 1 | 598715677952 | + | 1 | 287645373404 | + | 1 | 210622981215 | + | 1 | 343123305904 | + | 1 | 774538043323 | + | 1 | 190225759807 | + | 1 | 999658550876 | + | 1 | 837288668719 | + | 1 | 689811372963 | + | 1 | 585894403030 | + +-------+-------------------------+ + +Events by Category +------------------ + +Event category analysis with sorting. + +PPL query:: + + os> source=cloudtrail_logs | stats count() as Count by eventCategory | sort - Count | head 5; + fetched rows / total rows = 1/1 + +-------+---------------+ + | Count | eventCategory | + |-------|---------------| + | 100 | Management | + +-------+---------------+ + +Service Analysis +================ + +Top 10 Event APIs +----------------- + +Most frequently called API operations. + +PPL query:: + + os> source=cloudtrail_logs | stats count() as Count by `eventName` | sort - Count | head 10; + fetched rows / total rows = 10/10 + +-------+---------------------------+ + | Count | eventName | + |-------|---------------------------| + | 8 | InvokeFunction | + | 6 | GetItem | + | 5 | DescribeImages | + | 4 | GetCallerIdentity | + | 4 | DescribeSecurityGroups | + | 4 | CreateSecurityGroup | + | 3 | PutItem | + | 3 | ModifyDBInstance | + | 3 | GetObject | + | 3 | DeleteDBInstance | + +-------+---------------------------+ + +Top 10 Services +--------------- + +Most active AWS services. + +PPL query:: + + os> source=cloudtrail_logs | stats count() as Count by `eventSource` | sort - Count | head 10; + fetched rows / total rows = 10/10 + +-------+------------------------------+ + | Count | eventSource | + |-------|------------------------------| + | 15 | ec2.amazonaws.com | + | 14 | s3.amazonaws.com | + | 13 | rds.amazonaws.com | + | 10 | dynamodb.amazonaws.com | + | 9 | cloudwatch.amazonaws.com | + | 8 | sts.amazonaws.com | + | 8 | lambda.amazonaws.com | + | 8 | iam.amazonaws.com | + | 8 | cloudformation.amazonaws.com | + | 7 | logs.amazonaws.com | + +-------+------------------------------+ + +Security Analysis +================= + +Top 10 Source IPs +----------------- + +Source IP analysis excluding Amazon internal IPs. + +PPL query:: + + os> source=cloudtrail_logs | WHERE NOT (sourceIPAddress LIKE '%amazon%.com%') | STATS count() as Count by sourceIPAddress | SORT - Count | HEAD 10; + fetched rows / total rows = 10/10 + +-------+-----------------+ + | Count | sourceIPAddress | + |-------|-----------------| + | 1 | 116.142.58.92 | + | 1 | 140.38.65.165 | + | 1 | 58.138.87.219 | + | 1 | 180.3.121.23 | + | 1 | 207.28.12.237 | + | 1 | 210.84.80.238 | + | 1 | 222.105.156.190 | + | 1 | 104.201.253.14 | + | 1 | 207.220.131.48 | + | 1 | 190.240.94.208 | + +-------+-----------------+ + +Top 10 Users Generating Events +------------------------------ + +Complex user analysis with multiple fields. + +PPL query:: + + os> source=cloudtrail_logs | where ISNOTNULL(`userIdentity.accountId`) | STATS count() as Count by `userIdentity.sessionContext.sessionIssuer.userName`, `userIdentity.accountId`, `userIdentity.sessionContext.sessionIssuer.type` | rename `userIdentity.sessionContext.sessionIssuer.userName` as `User Name`, `userIdentity.accountId` as `Account Id`, `userIdentity.sessionContext.sessionIssuer.type` as `Type` | SORT - Count | HEAD 1000; + fetched rows / total rows = 20/20 + +-------+------------------+--------------+-------------+ + | Count | User Name | Account Id | Type | + |-------|------------------|--------------|-------------| + | 8 | DataEngineer | 190225759807 | Role | + | 6 | SecurityTeam | 287645373404 | Role | + | 5 | SystemAdmin | 585894403030 | Role | + | 4 | DevOps | 689811372963 | Role | + | 4 | CloudOps | 343123305904 | Role | + | 3 | IAMManager | 774538043323 | Role | + | 3 | AppDeveloper | 999658550876 | Role | + | 3 | NetworkAdmin | 837288668719 | Role | + | 2 | DatabaseAdmin | 598715677952 | Role | + | 2 | ServiceAccount | 210622981215 | Role | + +-------+------------------+--------------+-------------+ + +Regional Analysis +================= + +Events by Region +---------------- + +Event distribution across AWS regions. + +PPL query:: + + os> source=cloudtrail_logs | stats count() as Count by `awsRegion` | sort - Count | head 10; + fetched rows / total rows = 10/10 + +-------+----------------+ + | Count | awsRegion | + |-------|----------------| + | 12 | us-west-1 | + | 12 | ca-central-1 | + | 9 | us-west-2 | + | 8 | ap-southeast-1 | + | 8 | ap-northeast-1 | + | 7 | us-east-2 | + | 7 | sa-east-1 | + | 7 | eu-north-1 | + | 7 | ap-south-1 | + | 6 | ap-southeast-2 | + +-------+----------------+ + +EC2 Analysis +============ + +EC2 Change Event Count +---------------------- + +EC2 instance lifecycle events (Run, Stop, Terminate). + +PPL query:: + + os> source=cloudtrail_logs | where eventSource like "ec2%" and (eventName = "RunInstances" or eventName = "TerminateInstances" or eventName = "StopInstances") and not (eventName like "Get%" or eventName like "Describe%" or eventName like "List%" or eventName like "Head%") | stats count() as Count by eventName | sort - Count | head 5; + fetched rows / total rows = 2/2 + +-------+-------------------+ + | Count | eventName | + |-------|-------------------| + | 1 | TerminateInstances| + | 1 | RunInstances | + +-------+-------------------+ + +EC2 Users by Session Issuer +--------------------------- + +Users performing EC2 operations. + +PPL query:: + + os> source=cloudtrail_logs | where isnotnull(`userIdentity.sessionContext.sessionIssuer.userName`) and `eventSource` like 'ec2%' and not (`eventName` like 'Get%' or `eventName` like 'Describe%' or `eventName` like 'List%' or `eventName` like 'Head%') | stats count() as Count by `userIdentity.sessionContext.sessionIssuer.userName` | sort - Count | head 10; + fetched rows / total rows = 4/4 + +-------+----------------------------------------------------+ + | Count | userIdentity.sessionContext.sessionIssuer.userName | + |-------|----------------------------------------------------| + | 1 | Analyst | + | 1 | DataEngineer | + | 1 | ec2-service | + | 1 | | + +-------+----------------------------------------------------+ + +EC2 Events by Name +------------------ + +EC2 API operations excluding read-only operations. + +PPL query:: + + os> source=cloudtrail_logs | where `eventSource` like "ec2%" and not (`eventName` like "Get%" or `eventName` like "Describe%" or `eventName` like "List%" or `eventName` like "Head%") | stats count() as Count by `eventName` | rename `eventName` as `Event Name` | sort - Count | head 10; + fetched rows / total rows = 3/3 + +-------+---------------------+ + | Count | Event Name | + |-------|---------------------| + | 2 | CreateSecurityGroup | + | 1 | RunInstances | + | 1 | TerminateInstances | + +-------+---------------------+ + +S3 Analysis +=========== + +S3 Buckets +---------- + +S3 bucket analysis. + +PPL query:: + + os> source=cloudtrail_logs | where `eventSource` like 's3%' and isnotnull(`requestParameters.bucketName`) | stats count() as Count by `requestParameters.bucketName` | sort - Count | head 10; + fetched rows / total rows = 1/1 + +-------+------------------------------------+ + | Count | requestParameters.bucketName | + |-------|------------------------------------| + | 1 | test-cloudtrail-logs-123456789012 | + +-------+------------------------------------+ \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/nfw.rst b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/nfw.rst new file mode 100644 index 00000000000..07c3e3feddc --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/nfw.rst @@ -0,0 +1,600 @@ +.. + Copyright OpenSearch Contributors + SPDX-License-Identifier: Apache-2.0 + +================================== +Network Firewall Dashboard Queries +================================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + +Description +=========== + +Network Firewall PPL queries analyze network traffic patterns, security events, and flow characteristics. These queries demonstrate common dashboard patterns for AWS Network Firewall log analysis. + +.. note:: + Some queries may return results in different orders when multiple records have equal values (e.g., count = 1). This is expected database behavior and does not affect the correctness of the results. For queries with non-deterministic ordering, sample outputs may not be shown, but the query structure and schema remain valid. + +Traffic Analysis +================ + +Top Source IP by Packets +------------------------- + +Source IPs generating the most network packets over time. + +.. note:: + Results may vary in order when packet counts are equal. + +PPL query:: + + os> source=nfw_logs | stats sum(`event.netflow.pkts`) as packet_count by span(`event.timestamp`, 2d) as timestamp_span, `event.src_ip` | rename `event.src_ip` as `Source IP` | sort - packet_count | head 10; + fetched rows / total rows = 10/10 + +--------------+---------------------+----------------+ + | packet_count | timestamp_span | Source IP | + |--------------|---------------------|----------------| + | 53 | 2025-02-23 00:00:00 | 10.170.18.235 | + | 11 | 2025-02-23 00:00:00 | 8.8.8.8 | + | 11 | 2025-02-23 00:00:00 | 54.242.115.112 | + | 1 | 2025-03-27 00:00:00 | 45.82.78.100 | + | 1 | 2025-03-27 00:00:00 | 20.65.193.116 | + | 1 | 2025-03-27 00:00:00 | 172.16.0.100 | + | 1 | 2025-03-27 00:00:00 | 172.16.0.101 | + | 1 | 2025-03-27 00:00:00 | 172.16.0.102 | + | 0 | 2025-03-27 00:00:00 | 51.158.113.168 | + | 0 | 2025-03-27 00:00:00 | 10.2.1.120 | + +--------------+---------------------+----------------+ + +Top Application Protocols +-------------------------- + +Most common application layer protocols. + +PPL query:: + + os> source=nfw_logs | where isnotnull(`event.app_proto`) | STATS count() as Count by `event.app_proto` | SORT - Count | HEAD 10; + fetched rows / total rows = 4/4 + +-------+-----------------+ + | Count | event.app_proto | + |-------|-----------------| + | 89 | http | + | 5 | unknown | + | 2 | tls | + | 2 | dns | + +-------+-----------------+ + +Protocol Analysis +================= + +Top Protocols +------------- + +Most common network protocols (TCP, UDP, ICMP). + +PPL query:: + + os> source=nfw_logs | STATS count() as Count by `event.proto` | SORT - Count | HEAD 10; + fetched rows / total rows = 3/3 + +-------+-------------+ + | Count | event.proto | + |-------|-------------| + | 95 | TCP | + | 3 | ICMP | + | 2 | UDP | + +-------+-------------+ + +Security Analysis +================= + +Top Blocked Source IPs +----------------------- + +Source IPs with blocked traffic. + +PPL query:: + + os> source=nfw_logs | WHERE `event.alert.action` = "blocked" | STATS COUNT() as Count by `event.src_ip` | SORT - Count | HEAD 10; + fetched rows / total rows = 2/2 + +-------+---------------+ + | Count | event.src_ip | + |-------|---------------| + | 4 | 10.170.18.235 | + | 1 | 10.2.1.120 | + +-------+---------------+ + +Top Blocked Destination IPs +---------------------------- + +Destinations with blocked traffic. + +PPL query:: + + os> source=nfw_logs | WHERE `event.alert.action` = "blocked" | STATS COUNT() as Count by `event.dest_ip` | SORT - Count | HEAD 10; + fetched rows / total rows = 4/4 + +-------+----------------+ + | Count | event.dest_ip | + |-------|----------------| + | 2 | 8.8.8.8 | + | 1 | 54.146.42.172 | + | 1 | 54.242.115.112 | + | 1 | 52.216.211.88 | + +-------+----------------+ + +HTTP Analysis +============= + +Top HTTP Host Headers +--------------------- + +HTTP hostname analysis for allowed traffic. + +PPL query:: + + os> source=nfw_logs | where `event.alert.action` = "allowed" | stats count() as event_count by span(`event.timestamp`, 2d) as time_bucket, `event.http.hostname` | rename `event.http.hostname` as `Hostname` | sort - event_count; + fetched rows / total rows = 1/1 + +-------------+---------------------+----------+ + | event_count | time_bucket | Hostname | + |-------------|---------------------|----------| + | 1 | 2025-03-27 00:00:00 | null | + +-------------+---------------------+----------+ + +Top Blocked HTTP Host Headers +----------------------------- + +HTTP hostname analysis for blocked traffic. + +PPL query:: + + os> source=nfw_logs | where `event.alert.action` = "blocked" and isnotnull(`event.http.hostname`) | stats count() as event_count by span(`event.timestamp`, 2d) as time_bucket, `event.http.hostname` | rename `event.http.hostname` as `Hostname` | sort - event_count | HEAD 10; + fetched rows / total rows = 1/1 + +-------------+---------------------+----------------------+ + | event_count | time_bucket | Hostname | + |-------------|---------------------|----------------------| + | 1 | 2025-02-23 00:00:00 | checkip.amazonaws.com| + +-------------+---------------------+----------------------+ + +Top Allowed TLS SNI +------------------- + +TLS Server Name Indication analysis for allowed traffic. + +PPL query:: + + os> source=nfw_logs | where `event.alert.action` = "allowed" | stats count() as event_count by span(`event.timestamp`, 2d) as time_bucket, `event.tls.sni` | rename `event.tls.sni` as `Hostname` | sort - event_count | HEAD 10; + fetched rows / total rows = 1/1 + +-------------+---------------------+----------+ + | event_count | time_bucket | Hostname | + |-------------|---------------------|----------| + | 1 | 2025-03-27 00:00:00 | null | + +-------------+---------------------+----------+ + +Top Blocked TLS SNI +------------------- + +TLS Server Name Indication analysis for blocked traffic. + +PPL query:: + + os> source=nfw_logs | where `event.alert.action` = "blocked" and isnotnull(`event.tls.sni`) | stats count() as event_count by span(`event.timestamp`, 2d) as time_bucket, `event.tls.sni` | rename `event.tls.sni` as `Hostname` | sort - event_count | HEAD 10; + fetched rows / total rows = 2/2 + +-------------+---------------------+---------------------------+ + | event_count | time_bucket | Hostname | + |-------------|---------------------|---------------------------| + | 1 | 2025-02-23 00:00:00 | checkip.amazonaws.com | + | 1 | 2025-03-27 00:00:00 | s3.us-east-1.amazonaws.com| + +-------------+---------------------+---------------------------+ + +Top HTTP URI Paths +------------------ + +Most frequently requested URI paths. + +PPL query:: + + os> source=nfw_logs | where isnotnull(`event.http.url`) | stats count() as event_count by span(`event.timestamp`, 2d) as timestamp_span, `event.http.url` | rename `event.http.url` as `URL` | sort - event_count | head 10; + fetched rows / total rows = 1/1 + +-------------+---------------------+-----+ + | event_count | timestamp_span | URL | + |-------------|---------------------|-----| + | 1 | 2025-02-23 00:00:00 | / | + +-------------+---------------------+-----+ + +Top HTTP User Agents +-------------------- + +Most common HTTP user agents. + +PPL query:: + + os> source=nfw_logs | where isnotnull(`event.http.http_user_agent`) | stats count() as event_count by span(`event.timestamp`, 2d) as timestamp_span, `event.http.http_user_agent` | rename `event.http.http_user_agent` as `User Agent` | sort - event_count | head 10; + fetched rows / total rows = 1/1 + +-------------+---------------------+------------+ + | event_count | timestamp_span | User Agent | + |-------------|---------------------|------------| + | 1 | 2025-02-23 00:00:00 | curl/8.5.0 | + +-------------+---------------------+------------+ + +Private Link Analysis +===================== + +Top Private Link Endpoint Candidates +------------------------------------ + +Identify potential AWS service endpoints for Private Link. + +PPL query:: + + os> source=nfw_logs | where (`event.tls.sni` like 's3%') or (`event.http.hostname` like 's3%') or (`event.tls.sni` like 'dynamodb%') or (`event.http.hostname` like 'dynamodb%') or (`event.tls.sni` like 'backup%') or (`event.http.hostname` like 'backup%') | STATS count() as Count by `event.src_ip`, `event.dest_ip`, `event.app_proto`, `event.tls.sni`, `event.http.hostname` | rename `event.tls.sni` as SNI, `event.dest_ip` as Dest_IP, `event.src_ip` as Source_IP, `event.http.hostname` as Hostname, `event.app_proto` as App_Proto | SORT - Count; + fetched rows / total rows = 1/1 + +-------+---------------------------+---------------+------------+----------+-----------+ + | Count | SNI | Dest_IP | Source_IP | Hostname | App_Proto | + |-------|---------------------------|---------------|------------|----------|-----------| + | 1 | s3.us-east-1.amazonaws.com| 52.216.211.88 | 10.2.1.120 | null | tls | + +-------+---------------------------+---------------+------------+----------+-----------+ + +Port Analysis +============= + +Top Source Ports +---------------- + +Most active source ports over time. + +PPL query:: + + os> source=nfw_logs | stats count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_port` | eval `Source Port` = CAST(`event.src_port` AS STRING) | sort - Count | HEAD 10; + fetched rows / total rows = 10/10 + +-------+---------------------+---------------+-------------+ + | Count | timestamp_span | event.src_port| Source Port | + |-------|---------------------|---------------|-------------| + | 1 | 2025-02-23 00:00:00 | 52610 | 52610 | + | 1 | 2025-02-23 00:00:00 | 45550 | 45550 | + | 1 | 2025-02-23 00:00:00 | 33445 | 33445 | + | 1 | 2025-02-23 00:00:00 | 22334 | 22334 | + | 1 | 2025-03-27 00:00:00 | 55123 | 55123 | + | 1 | 2025-03-27 00:00:00 | 44567 | 44567 | + | 1 | 2025-03-27 00:00:00 | 33890 | 33890 | + | 1 | 2025-03-27 00:00:00 | 22445 | 22445 | + | 1 | 2025-03-27 00:00:00 | 11234 | 11234 | + | 1 | 2025-03-27 00:00:00 | 9876 | 9876 | + +-------+---------------------+---------------+-------------+ + +Top Destination Ports +--------------------- + +Most active destination ports over time. + +PPL query:: + + os> source=nfw_logs | stats count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.dest_port` | eval `Destination Port` = CAST(`event.dest_port` AS STRING) | sort - Count | HEAD 10; + fetched rows / total rows = 10/10 + +-------+---------------------+----------------+------------------+ + | Count | timestamp_span | event.dest_port| Destination Port | + |-------|---------------------|----------------|------------------| + | 15 | 2025-02-23 00:00:00 | 80 | 80 | + | 12 | 2025-02-23 00:00:00 | 443 | 443 | + | 8 | 2025-02-23 00:00:00 | 53 | 53 | + | 5 | 2025-03-27 00:00:00 | 8085 | 8085 | + | 3 | 2025-03-27 00:00:00 | 1433 | 1433 | + | 2 | 2025-03-27 00:00:00 | 22 | 22 | + | 2 | 2025-03-27 00:00:00 | 3389 | 3389 | + | 1 | 2025-03-27 00:00:00 | 5900 | 5900 | + | 1 | 2025-03-27 00:00:00 | 993 | 993 | + | 1 | 2025-03-27 00:00:00 | 995 | 995 | + +-------+---------------------+----------------+------------------+ + +TCP Flow Analysis +================= + +Top TCP Flows +------------- + +Most active TCP connections. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = "TCP" | STATS count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10; + fetched rows / total rows = 10/10 + +-------+---------------------+---------------+----------------+----------------+------------------------------------+ + | Count | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |-------|---------------------|---------------|----------------|----------------|------------------------------------| + | 1 | 2025-02-23 00:00:00 | 10.170.18.235 | 8.8.8.8 | 80 | 10.170.18.235 - 8.8.8.8: 80 | + | 1 | 2025-02-23 00:00:00 | 10.170.18.235 | 54.242.115.112 | 443 | 10.170.18.235 - 54.242.115.112: 443| + | 1 | 2025-03-27 00:00:00 | 45.82.78.100 | 10.2.1.120 | 8085 | 45.82.78.100 - 10.2.1.120: 8085 | + | 1 | 2025-03-27 00:00:00 | 20.65.193.116 | 10.2.1.120 | 1433 | 20.65.193.116 - 10.2.1.120: 1433 | + | 1 | 2025-03-27 00:00:00 | 172.16.0.100 | 192.168.1.10 | 22 | 172.16.0.100 - 192.168.1.10: 22 | + | 1 | 2025-03-27 00:00:00 | 172.16.0.101 | 192.168.1.11 | 3389 | 172.16.0.101 - 192.168.1.11: 3389 | + | 1 | 2025-03-27 00:00:00 | 172.16.0.102 | 192.168.1.12 | 5900 | 172.16.0.102 - 192.168.1.12: 5900 | + | 1 | 2025-03-27 00:00:00 | 10.0.1.50 | 203.0.113.100 | 993 | 10.0.1.50 - 203.0.113.100: 993 | + | 1 | 2025-03-27 00:00:00 | 10.0.1.51 | 203.0.113.101 | 995 | 10.0.1.51 - 203.0.113.101: 995 | + | 1 | 2025-03-27 00:00:00 | 10.0.1.52 | 203.0.113.102 | 25 | 10.0.1.52 - 203.0.113.102: 25 | + +-------+---------------------+---------------+----------------+----------------+------------------------------------+ + +Top TCP Flows by Packets +------------------------ + +TCP connections with highest packet counts. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = "TCP" | STATS sum(`event.netflow.pkts`) as Packets by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Packets | HEAD 10; + fetched rows / total rows = 10/10 + +---------+---------------------+---------------+----------------+----------------+------------------------------------+ + | Packets | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |---------|---------------------|---------------|----------------|----------------|------------------------------------| + | 53 | 2025-02-23 00:00:00 | 10.170.18.235 | 8.8.8.8 | 80 | 10.170.18.235 - 8.8.8.8: 80 | + | 11 | 2025-02-23 00:00:00 | 10.170.18.235 | 54.242.115.112 | 443 | 10.170.18.235 - 54.242.115.112: 443| + | 5 | 2025-03-27 00:00:00 | 45.82.78.100 | 10.2.1.120 | 8085 | 45.82.78.100 - 10.2.1.120: 8085 | + | 3 | 2025-03-27 00:00:00 | 20.65.193.116 | 10.2.1.120 | 1433 | 20.65.193.116 - 10.2.1.120: 1433 | + | 2 | 2025-03-27 00:00:00 | 172.16.0.100 | 192.168.1.10 | 22 | 172.16.0.100 - 192.168.1.10: 22 | + | 1 | 2025-03-27 00:00:00 | 172.16.0.101 | 192.168.1.11 | 3389 | 172.16.0.101 - 192.168.1.11: 3389 | + | 1 | 2025-03-27 00:00:00 | 172.16.0.102 | 192.168.1.12 | 5900 | 172.16.0.102 - 192.168.1.12: 5900 | + | 1 | 2025-03-27 00:00:00 | 10.0.1.50 | 203.0.113.100 | 993 | 10.0.1.50 - 203.0.113.100: 993 | + | 1 | 2025-03-27 00:00:00 | 10.0.1.51 | 203.0.113.101 | 995 | 10.0.1.51 - 203.0.113.101: 995 | + | 1 | 2025-03-27 00:00:00 | 10.0.1.52 | 203.0.113.102 | 25 | 10.0.1.52 - 203.0.113.102: 25 | + +---------+---------------------+---------------+----------------+----------------+------------------------------------+ + +Top TCP Flows by Bytes +---------------------- + +TCP connections with highest byte counts. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = "TCP" | STATS sum(event.netflow.bytes) as Bytes by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Bytes | HEAD 10; + fetched rows / total rows = 10/10 + +-------+---------------------+---------------+----------------+----------------+------------------------------------+ + | Bytes | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |-------|---------------------|---------------|----------------|----------------|------------------------------------| + | 15420 | 2025-02-23 00:00:00 | 10.170.18.235 | 8.8.8.8 | 80 | 10.170.18.235 - 8.8.8.8: 80 | + | 8950 | 2025-02-23 00:00:00 | 10.170.18.235 | 54.242.115.112 | 443 | 10.170.18.235 - 54.242.115.112: 443| + | 2048 | 2025-03-27 00:00:00 | 45.82.78.100 | 10.2.1.120 | 8085 | 45.82.78.100 - 10.2.1.120: 8085 | + | 1536 | 2025-03-27 00:00:00 | 20.65.193.116 | 10.2.1.120 | 1433 | 20.65.193.116 - 10.2.1.120: 1433 | + | 1024 | 2025-03-27 00:00:00 | 172.16.0.100 | 192.168.1.10 | 22 | 172.16.0.100 - 192.168.1.10: 22 | + | 512 | 2025-03-27 00:00:00 | 172.16.0.101 | 192.168.1.11 | 3389 | 172.16.0.101 - 192.168.1.11: 3389 | + | 256 | 2025-03-27 00:00:00 | 172.16.0.102 | 192.168.1.12 | 5900 | 172.16.0.102 - 192.168.1.12: 5900 | + | 128 | 2025-03-27 00:00:00 | 10.0.1.50 | 203.0.113.100 | 993 | 10.0.1.50 - 203.0.113.100: 993 | + | 64 | 2025-03-27 00:00:00 | 10.0.1.51 | 203.0.113.101 | 995 | 10.0.1.51 - 203.0.113.101: 995 | + | 32 | 2025-03-27 00:00:00 | 10.0.1.52 | 203.0.113.102 | 25 | 10.0.1.52 - 203.0.113.102: 25 | + +-------+---------------------+---------------+----------------+----------------+------------------------------------+ + +Top TCP Flags +------------- + +Most common TCP flag combinations. + +PPL query:: + + os> source=nfw_logs | STATS count() as Count by `event.tcp.tcp_flags` | SORT - Count | HEAD 10; + fetched rows / total rows = 10/10 + +-------+---------------------+ + | Count | event.tcp.tcp_flags | + |-------|---------------------| + | 8 | null | + | 4 | 13 | + | 4 | 17 | + | 3 | 0 | + | 3 | 1 | + | 3 | 15 | + | 3 | 16 | + | 3 | 18 | + | 3 | 19 | + | 3 | 2 | + +-------+---------------------+ + +UDP Flow Analysis +================= + +Top UDP Flows +------------- + +Most active UDP connections. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = "UDP" | STATS count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10; + fetched rows / total rows = 2/2 + +-------+---------------------+---------------+----------------+----------------+---------------------------------+ + | Count | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |-------|---------------------|---------------|----------------|----------------|---------------------------------| + | 1 | 2025-02-23 00:00:00 | 10.0.2.100 | 8.8.8.8 | 53 | 10.0.2.100 - 8.8.8.8: 53 | + | 1 | 2025-03-27 00:00:00 | 10.0.2.101 | 1.1.1.1 | 53 | 10.0.2.101 - 1.1.1.1: 53 | + +-------+---------------------+---------------+----------------+----------------+---------------------------------+ + +Top UDP Flows by Packets +------------------------ + +UDP connections with highest packet counts. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = "UDP" | STATS sum(`event.netflow.pkts`) as Packets by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Packets | HEAD 10; + fetched rows / total rows = 2/2 + +---------+---------------------+---------------+----------------+----------------+---------------------------------+ + | Packets | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |---------|---------------------|---------------|----------------|----------------|---------------------------------| + | 2 | 2025-02-23 00:00:00 | 10.0.2.100 | 8.8.8.8 | 53 | 10.0.2.100 - 8.8.8.8: 53 | + | 1 | 2025-03-27 00:00:00 | 10.0.2.101 | 1.1.1.1 | 53 | 10.0.2.101 - 1.1.1.1: 53 | + +---------+---------------------+---------------+----------------+----------------+---------------------------------+ + +Top UDP Flows by Bytes +---------------------- + +UDP connections with highest byte counts. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = "UDP" | STATS sum(`event.netflow.bytes`) as Bytes by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Bytes | HEAD 10; + fetched rows / total rows = 2/2 + +-------+---------------------+---------------+----------------+----------------+---------------------------------+ + | Bytes | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |-------|---------------------|---------------|----------------|----------------|---------------------------------| + | 128 | 2025-02-23 00:00:00 | 10.0.2.100 | 8.8.8.8 | 53 | 10.0.2.100 - 8.8.8.8: 53 | + | 64 | 2025-03-27 00:00:00 | 10.0.2.101 | 1.1.1.1 | 53 | 10.0.2.101 - 1.1.1.1: 53 | + +-------+---------------------+---------------+----------------+----------------+---------------------------------+ + +ICMP Flow Analysis +================== + +Top ICMP Flows +-------------- + +Most active ICMP connections. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = "ICMP" | STATS count() as Count by SPAN(`event.timestamp`, 1d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10; + fetched rows / total rows = 3/3 + +-------+---------------------+---------------+----------------+----------------+---------------------------------+ + | Count | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |-------|---------------------|---------------|----------------|----------------|---------------------------------| + | 1 | 2025-02-23 00:00:00 | 10.0.3.100 | 8.8.8.8 | 0 | 10.0.3.100 - 8.8.8.8: 0 | + | 1 | 2025-03-27 00:00:00 | 10.0.3.101 | 1.1.1.1 | 0 | 10.0.3.101 - 1.1.1.1: 0 | + | 1 | 2025-03-27 00:00:00 | 192.168.2.50 | 203.0.113.200 | 0 | 192.168.2.50 - 203.0.113.200: 0 | + +-------+---------------------+---------------+----------------+----------------+---------------------------------+ + +Rule Analysis +============= + +Top Drop/Reject Rules +--------------------- + +Most frequently triggered blocking rules. + +PPL query:: + + os> source=nfw_logs | WHERE `event.alert.action` = "blocked" | STATS count() as Count by `event.alert.signature_id`, `event.alert.action`, `event.alert.signature`, `event.proto` | RENAME `event.alert.signature_id` as SID, `event.alert.action` as Action, `event.alert.signature` as Message, `event.proto` as Proto | SORT - Count | HEAD 10; + fetched rows / total rows = 3/3 + +-------+-----+---------+---------------------------+-------+ + | Count | SID | Action | Message | Proto | + |-------|-----|---------|---------------------------|-------| + | 3 | 1 | blocked | Suspicious Traffic Block | TCP | + | 1 | 2 | blocked | Port Scan Detection | TCP | + | 1 | 3 | blocked | Malware Communication | TCP | + +-------+-----+---------+---------------------------+-------+ + +Top Allowed Rules +----------------- + +Most frequently triggered allowing rules. + +PPL query:: + + os> source=nfw_logs | where `event.alert.action` = "allowed" | stats count() as Count by `event.alert.signature_id`, `event.alert.action`, `event.alert.signature`, `event.proto` | rename `event.alert.signature_id` as SID, `event.alert.action` as Action, `event.alert.signature` as Message, `event.proto` as Proto | sort - Count | head 10; + fetched rows / total rows = 1/1 + +-------+-----+---------+---------------------------+-------+ + | Count | SID | Action | Message | Proto | + |-------|-----|---------|---------------------------|-------| + | 1 | 100 | allowed | Standard Web Traffic | TCP | + +-------+-----+---------+---------------------------+-------+ + +Blocked Traffic Analysis +======================== + +Top Blocked Destination Ports +----------------------------- + +Most frequently blocked destination ports. + +PPL query:: + + os> source=nfw_logs | WHERE `event.alert.action` = "blocked" | STATS COUNT() as `Count` by `event.dest_port` | EVAL `Destination Port` = CAST(`event.dest_port` as STRING) | SORT - `Count` | HEAD 10; + fetched rows / total rows = 4/4 + +-------+----------------+------------------+ + | Count | event.dest_port| Destination Port | + |-------|----------------|------------------| + | 2 | 53 | 53 | + | 1 | 80 | 80 | + | 1 | 443 | 443 | + | 1 | 22 | 22 | + +-------+----------------+------------------+ + +Top Blocked Remote Access Ports +------------------------------- + +Blocked connections to remote access ports. + +PPL query:: + + os> source=nfw_logs | WHERE `event.alert.action` = "blocked" | STATS count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10; + fetched rows / total rows = 5/5 + +-------+---------------------+---------------+----------------+----------------+------------------------------------+ + | Count | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |-------|---------------------|---------------|----------------|----------------|------------------------------------| + | 2 | 2025-02-23 00:00:00 | 10.170.18.235 | 8.8.8.8 | 53 | 10.170.18.235 - 8.8.8.8: 53 | + | 1 | 2025-02-23 00:00:00 | 10.170.18.235 | 54.146.42.172 | 80 | 10.170.18.235 - 54.146.42.172: 80 | + | 1 | 2025-02-23 00:00:00 | 10.170.18.235 | 54.242.115.112 | 443 | 10.170.18.235 - 54.242.115.112: 443| + | 1 | 2025-03-27 00:00:00 | 10.2.1.120 | 52.216.211.88 | 22 | 10.2.1.120 - 52.216.211.88: 22 | + +-------+---------------------+---------------+----------------+----------------+------------------------------------+ + +Top Blocked TCP Flows +--------------------- + +Blocked TCP connections. + +PPL query:: + + os> source=nfw_logs | WHERE `event.alert.action` = 'blocked' and `event.proto` = 'TCP' | STATS count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10; + fetched rows / total rows = 4/4 + +-------+---------------------+---------------+----------------+----------------+------------------------------------+ + | Count | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |-------|---------------------|---------------|----------------|----------------|------------------------------------| + | 1 | 2025-02-23 00:00:00 | 10.170.18.235 | 54.146.42.172 | 80 | 10.170.18.235 - 54.146.42.172: 80 | + | 1 | 2025-02-23 00:00:00 | 10.170.18.235 | 54.242.115.112 | 443 | 10.170.18.235 - 54.242.115.112: 443| + | 1 | 2025-03-27 00:00:00 | 10.2.1.120 | 52.216.211.88 | 22 | 10.2.1.120 - 52.216.211.88: 22 | + +-------+---------------------+---------------+----------------+----------------+------------------------------------+ + +Top Blocked UDP Flows +--------------------- + +Blocked UDP connections. + +PPL query:: + + os> source=nfw_logs | WHERE `event.alert.action` = 'blocked' and `event.proto` = 'UDP' | STATS count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP - Dst IP:Port` = CONCAT(`event.src_ip`, " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10; + fetched rows / total rows = 1/1 + +-------+---------------------+---------------+----------------+----------------+---------------------------------+ + | Count | timestamp_span | event.src_ip | event.dest_ip | event.dest_port| Src IP - Dst IP:Port | + |-------|---------------------|---------------|----------------|----------------|---------------------------------| + | 2 | 2025-02-23 00:00:00 | 10.170.18.235 | 8.8.8.8 | 53 | 10.170.18.235 - 8.8.8.8: 53 | + +-------+---------------------+---------------+----------------+----------------+---------------------------------+ + +Advanced TCP Analysis +===================== + +Top TCP Flows SYN with SYN-ACK +------------------------------ + +TCP connections with SYN and ACK flags. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = 'TCP' and `event.tcp.syn` = "true" and `event.tcp.ack` = "true" | STATS count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.src_port`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP:Port - Dst IP:Port` = CONCAT(`event.src_ip`, ": ", CAST(`event.src_port` AS STRING), " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10; + fetched rows / total rows = 2/2 + +-------+---------------------+---------------+---------------+----------------+-----------------+--------------------------------------+ + | Count | timestamp_span | event.src_ip | event.src_port| event.dest_ip | event.dest_port | Src IP:Port - Dst IP:Port | + |-------|---------------------|---------------|---------------|----------------|-----------------|--------------------------------------| + | 1 | 2025-02-23 00:00:00 | 10.170.18.235 | 52610 | 8.8.8.8 | 80 | 10.170.18.235: 52610 - 8.8.8.8: 80 | + | 1 | 2025-03-27 00:00:00 | 45.82.78.100 | 45550 | 10.2.1.120 | 8085 | 45.82.78.100: 45550 - 10.2.1.120: 8085| + +-------+---------------------+---------------+---------------+----------------+-----------------+--------------------------------------+ + +Top Long-Lived TCP Flows +------------------------- + +TCP connections active for extended periods. + +PPL query:: + + os> source=nfw_logs | WHERE `event.proto` = 'TCP' and `event.netflow.age` > 350 | STATS count() as Count by SPAN(`event.timestamp`, 2d) as timestamp_span, `event.src_ip`, `event.src_port`, `event.dest_ip`, `event.dest_port` | EVAL `Src IP:Port - Dst IP:Port` = CONCAT(`event.src_ip`, ": ", CAST(`event.src_port` AS STRING), " - ", `event.dest_ip`, ": ", CAST(`event.dest_port` AS STRING)) | SORT - Count | HEAD 10; + fetched rows / total rows = 2/2 + +-------+---------------------+---------------+---------------+----------------+-----------------+------------------------------------------+ + | Count | timestamp_span | event.src_ip | event.src_port| event.dest_ip | event.dest_port | Src IP:Port - Dst IP:Port | + |-------|---------------------|---------------|---------------|----------------|-----------------|------------------------------------------| + | 1 | 2025-03-27 00:00:00 | 45.82.78.100 | 52610 | 10.2.1.120 | 8085 | 45.82.78.100: 52610 - 10.2.1.120: 8085 | + | 1 | 2025-03-27 00:00:00 | 20.65.193.116 | 45550 | 10.2.1.120 | 1433 | 20.65.193.116: 45550 - 10.2.1.120: 1433 | + +-------+---------------------+---------------+---------------+----------------+-----------------+------------------------------------------+ \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/vpc.rst b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/vpc.rst new file mode 100644 index 00000000000..49b5ede5826 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/vpc.rst @@ -0,0 +1,289 @@ +.. + Copyright OpenSearch Contributors + SPDX-License-Identifier: Apache-2.0 + +=============================== +VPC Flow Logs Dashboard Queries +=============================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + +Description +=========== + +VPC Flow Logs PPL queries analyze network flow patterns, traffic volume, and AWS service interactions. These queries demonstrate common dashboard patterns for VPC Flow Logs analysis. + +Basic Aggregations +================== + +Total Requests +-------------- + +Basic count aggregation for all flow records. + +PPL query:: + + os> source=vpc_flow_logs | stats count(); + fetched rows / total rows = 1/1 + +----------+ + | count() | + |----------| + | 100 | + +----------+ + +Total Flows by Actions +---------------------- + +Flow distribution by ACCEPT/REJECT actions. + +PPL query:: + + os> source=vpc_flow_logs | STATS count() as Count by action | SORT - Count | HEAD 5; + fetched rows / total rows = 2/2 + +-------+--------+ + | Count | action | + |-------|--------| + | 92 | ACCEPT | + | 8 | REJECT | + +-------+--------+ + +Time-based Analysis +=================== + +Flows Over Time +--------------- + +Flow patterns over time using span functions. + +PPL query:: + + os> source=vpc_flow_logs | STATS count() by span(`start`, 30d); + fetched rows / total rows = 7/7 + +----------+----------------------+ + | count() | span(`start`,30d) | + |----------|----------------------| + | 6 | 2025-04-12 00:00:00 | + | 24 | 2025-05-12 00:00:00 | + | 17 | 2025-06-11 00:00:00 | + | 12 | 2025-07-11 00:00:00 | + | 17 | 2025-08-10 00:00:00 | + | 13 | 2025-09-09 00:00:00 | + | 11 | 2025-10-09 00:00:00 | + +----------+----------------------+ + +Bytes Transferred Over Time +--------------------------- + +Byte transfer trends over time periods. + +PPL query:: + + os> source=vpc_flow_logs | STATS sum(bytes) by span(`start`, 30d); + fetched rows / total rows = 7/7 + +------------+----------------------+ + | sum(bytes) | span(`start`,30d) | + |------------|----------------------| + | 385560 | 2025-04-12 00:00:00 | + | 1470623 | 2025-05-12 00:00:00 | + | 1326170 | 2025-06-11 00:00:00 | + | 946422 | 2025-07-11 00:00:00 | + | 826957 | 2025-08-10 00:00:00 | + | 719758 | 2025-09-09 00:00:00 | + | 643042 | 2025-10-09 00:00:00 | + +------------+----------------------+ + +Traffic Analysis +================ + +Top Talkers by Bytes +-------------------- + +Source IPs generating the most traffic by bytes. + +PPL query:: + + os> source=vpc_flow_logs | stats sum(bytes) as Bytes by srcaddr | sort - Bytes | head 10; + fetched rows / total rows = 10/10 + +--------+----------------+ + | Bytes | srcaddr | + |--------|----------------| + | 267655 | 121.65.198.154 | + | 259776 | 10.0.91.27 | + | 214512 | 10.0.165.194 | + | 210396 | 6.186.106.13 | + | 192355 | 182.53.30.77 | + | 187200 | 10.0.163.249 | + | 183353 | 30.193.135.22 | + | 182055 | 213.227.231.57 | + | 176391 | 39.40.182.87 | + | 175820 | 10.0.14.9 | + +--------+----------------+ + +Top Destinations by Bytes +-------------------------- + +Destination IPs receiving the most bytes. + +PPL query:: + + os> source=vpc_flow_logs | stats sum(bytes) as Bytes by dstaddr | sort - Bytes | head 10; + fetched rows / total rows = 10/10 + +--------+----------------+ + | Bytes | dstaddr | + |--------|----------------| + | 267655 | 10.0.113.54 | + | 259776 | 11.111.108.48 | + | 214512 | 223.252.77.226 | + | 210396 | 10.0.194.75 | + | 192355 | 10.0.11.144 | + | 187200 | 120.67.35.74 | + | 183353 | 10.0.167.74 | + | 182055 | 10.0.74.110 | + | 176391 | 10.0.3.220 | + | 175820 | 10.0.83.167 | + +--------+----------------+ + +Packets Transferred Over Time +----------------------------- + +Packet transfer trends over time periods. + +PPL query:: + + os> source=vpc_flow_logs | STATS sum(packets) by span(`start`, 30d); + fetched rows / total rows = 7/7 + +--------------+----------------------+ + | sum(packets) | span(`start`,30d) | + |--------------|----------------------| + | 360 | 2025-04-12 00:00:00 | + | 1715 | 2025-05-12 00:00:00 | + | 1396 | 2025-06-11 00:00:00 | + | 804 | 2025-07-11 00:00:00 | + | 941 | 2025-08-10 00:00:00 | + | 890 | 2025-09-09 00:00:00 | + | 709 | 2025-10-09 00:00:00 | + +--------------+----------------------+ + +Top Talkers by Packets +---------------------- + +Source IPs generating the most packets. + +PPL query:: + + os> source=vpc_flow_logs | stats sum(packets) as Packets by srcaddr | sort - Packets | head 10; + fetched rows / total rows = 10/10 + +---------+----------------+ + | Packets | srcaddr | + |---------|----------------| + | 200 | 10.0.163.249 | + | 199 | 121.65.198.154 | + | 198 | 10.0.91.27 | + | 197 | 6.186.106.13 | + | 181 | 115.27.64.3 | + | 181 | 30.193.135.22 | + | 176 | 10.0.227.35 | + | 174 | 10.0.99.147 | + | 171 | 10.0.231.176 | + | 164 | 10.0.165.194 | + +---------+----------------+ + +Top Destinations by Packets +--------------------------- + +Destination IPs receiving the most packets. + +PPL query:: + + os> source=vpc_flow_logs | stats sum(packets) as Packets by dstaddr | sort - Packets | head 10; + fetched rows / total rows = 10/10 + +---------+----------------+ + | Packets | dstaddr | + |---------|----------------| + | 200 | 120.67.35.74 | + | 199 | 10.0.113.54 | + | 198 | 11.111.108.48 | + | 197 | 10.0.194.75 | + | 181 | 10.0.167.74 | + | 181 | 10.0.159.18 | + | 176 | 10.0.62.137 | + | 174 | 182.58.134.190 | + | 171 | 34.55.235.91 | + | 164 | 118.124.149.78 | + +---------+----------------+ + +Connection Analysis +=================== + +Top Talkers by IP Count +----------------------- + +Source IPs with most connection attempts. + +PPL query:: + + os> source=vpc_flow_logs | STATS count() as Count by srcaddr | SORT - Count | HEAD 10; + fetched rows / total rows = 10/10 + +-------+----------------+ + | Count | srcaddr | + |-------|----------------| + | 1 | 1.24.59.183 | + | 1 | 10.0.101.123 | + | 1 | 10.0.107.121 | + | 1 | 10.0.107.130 | + | 1 | 10.0.108.29 | + | 1 | 10.0.115.237 | + | 1 | 10.0.117.121 | + | 1 | 10.0.126.80 | + | 1 | 10.0.13.162 | + | 1 | 10.0.132.168 | + +-------+----------------+ + +Top Destinations by IP Count +---------------------------- + +Destination IPs with most incoming connections. + +PPL query:: + + os> source=vpc_flow_logs | stats count() as Requests by dstaddr | sort - Requests | head 10; + fetched rows / total rows = 10/10 + +----------+----------------+ + | Requests | dstaddr | + |----------|----------------| + | 1 | 10.0.100.62 | + | 1 | 10.0.107.6 | + | 1 | 10.0.109.2 | + | 1 | 10.0.11.144 | + | 1 | 10.0.113.54 | + | 1 | 10.0.116.210 | + | 1 | 10.0.118.54 | + | 1 | 10.0.127.142 | + | 1 | 10.0.138.175 | + | 1 | 10.0.147.33 | + +----------+----------------+ + +Heat Map Analysis +================= + +Top Talkers Heat Map +-------------------- + +Source-destination IP pair analysis for traffic patterns. + +PPL query:: + + os> source=vpc_flow_logs | stats count() as Count by dstaddr, srcaddr | sort - Count | head 100; + fetched rows / total rows = 100/100 + +-------+----------------+----------------+ + | Count | dstaddr | srcaddr | + |-------|----------------|----------------| + | 1 | 10.0.100.62 | 1.24.59.183 | + | 1 | 10.0.107.6 | 10.0.101.123 | + | ... | ... | ... | + +-------+----------------+----------------+ + diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/waf.rst b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/waf.rst new file mode 100644 index 00000000000..58592d7fd92 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/templates/dashboard/waf.rst @@ -0,0 +1,224 @@ +.. + Copyright OpenSearch Contributors + SPDX-License-Identifier: Apache-2.0 + +========================= +WAF Dashboard PPL Queries +========================= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + +Description +=========== + +WAF PPL queries analyze web traffic patterns, security events, and rule effectiveness. These queries demonstrate common dashboard patterns for AWS WAF log analysis. + +Request Analysis +================ + +Total Requests +-------------- + +Basic count aggregation for all WAF requests. + +PPL query:: + + os> source=waf_logs | stats count(); + fetched rows / total rows = 1/1 + +----------+ + | count() | + |----------| + | 100 | + +----------+ + +Request History +--------------- + +Request patterns over time by action (ALLOW/BLOCK). + +PPL query:: + + os> source=waf_logs | STATS count() as Count by span(start_time, 30d), action | SORT - Count; + fetched rows / total rows = 15/15 + +-------+------------------------+--------+ + | Count | span(start_time,30d) | action | + |-------|------------------------|--------| + | 82 | 2025-05-01T00:00:00Z | ALLOW | + | 17 | 2025-05-01T00:00:00Z | BLOCK | + | 1 | 2025-05-01T00:00:00Z | COUNT | + +-------+------------------------+--------+ + +WebACL Analysis +=============== + +Requests to WebACLs +------------------- + +Request distribution across different WebACLs. + +PPL query:: + + os> source=waf_logs | stats count() as Count by `webaclId` | sort - Count | head 10; + fetched rows / total rows = 10/10 + +-------+-------------------------------------------------------------------------------------------+ + | Count | webaclId | + |-------|-------------------------------------------------------------------------------------------| + | 1 | arn:aws:wafv2:us-east-1:784781757088:regional/webacl/APIWAF-lkl/01b29038-23ae-14c5-... | + | 1 | arn:aws:wafv2:eu-central-1:250922725343:regional/webacl/SecurityWAF-ngh/018f30a7-... | + | 1 | arn:aws:wafv2:us-west-2:712448542372:regional/webacl/DevWAF-nni/04e6fdf3-14a2-1071-... | + | ... | ... | + +-------+-------------------------------------------------------------------------------------------+ + +Sources Analysis +---------------- + +Analysis of HTTP source identifiers. + +PPL query:: + + os> source=waf_logs | stats count() as Count by `httpSourceId` | sort - Count | head 5; + fetched rows / total rows = 5/5 + +-------+---------------------------+ + | Count | httpSourceId | + |-------|---------------------------| + | 1 | 784781757088:zn99vte24b:staging | + | 1 | 250922725343:06udlnzsuc:v2 | + | 1 | 712448542372:h11d127c1c:prod | + | 1 | 915064614783:oudun2xjou:v1 | + | 1 | 782258924067:8xbvht9icb:dev | + +-------+---------------------------+ + +Geographic Analysis +=================== + +Top Client IPs +--------------- + +Most active client IP addresses. + +PPL query:: + + os> source=waf_logs | stats count() as Count by `httpRequest.clientIp` | sort - Count | head 10; + fetched rows / total rows = 10/10 + +-------+----------------------+ + | Count | httpRequest.clientIp | + |-------|----------------------| + | 1 | 185.114.91.138 | + | 1 | 155.12.221.78 | + | 1 | 121.173.165.128 | + | 1 | 13.234.156.211 | + | 1 | 142.126.11.6 | + | ... | ... | + +-------+----------------------+ + +Top Countries +------------- + +Request distribution by country of origin. + +PPL query:: + + os> source=waf_logs | stats count() as Count by `httpRequest.country` | sort - Count; + fetched rows / total rows = 25/25 + +-------+---------------------+ + | Count | httpRequest.country | + |-------|---------------------| + | 33 | US | + | 8 | GB | + | 7 | DE | + | 7 | BR | + | 6 | CA | + | 5 | RU | + | 3 | JP | + | 3 | IN | + | 3 | CN | + | 3 | BE | + | 2 | SG | + | 2 | SE | + | 2 | MX | + | 2 | IE | + | 2 | ES | + | 2 | CH | + | 2 | AU | + | 1 | ZA | + | 1 | PT | + | 1 | NL | + | 1 | IT | + | 1 | FR | + | 1 | FI | + | 1 | CL | + | 1 | AT | + +-------+---------------------+ + +Rule Analysis +============= + +Top Terminating Rules +--------------------- + +Most frequently triggered WAF rules. + +PPL query:: + + os> source=waf_logs | stats count() as Count by `terminatingRuleId` | sort - Count | head 10; + fetched rows / total rows = 10/10 + +-------+------------------------------------------+ + | Count | terminatingRuleId | + |-------|------------------------------------------| + | 13 | AWS-AWSManagedRulesAmazonIpReputationList | + | 11 | XSSProtectionRule | + | 11 | Default_Action | + | 10 | AWS-AWSManagedRulesKnownBadInputsRuleSet | + | 8 | CustomRateLimitRule | + | 8 | AWS-AWSManagedRulesCommonRuleSet | + | 7 | CustomIPWhitelistRule | + | 7 | AWS-AWSManagedRulesSQLiRuleSet | + | 7 | AWS-AWSManagedRulesLinuxRuleSet | + | 5 | CSRFProtectionRule | + +-------+------------------------------------------+ + +Total Blocked Requests +---------------------- + +Count of requests blocked by WAF rules. + +PPL query:: + + os> source=waf_logs | WHERE action = "BLOCK" | STATS count(); + fetched rows / total rows = 1/1 + +----------+ + | count() | + |----------| + | 21 | + +----------+ + +URI Analysis +============ + +Top Request URIs +---------------- + +Most frequently requested URI paths. + +PPL query:: + + os> source=waf_logs | stats count() as Count by `httpRequest.uri` | sort - Count | head 10; + fetched rows / total rows = 10/10 + +-------+------------------+ + | Count | httpRequest.uri | + |-------|------------------| + | 5 | /api/v2/search | + | 5 | /account | + | 4 | /products | + | 4 | /css/style.css | + | 3 | /test | + | 3 | /download | + | 3 | /docs | + | 3 | /billing | + | 3 | /api/v2/users | + | 2 | /about | + +-------+------------------+ \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/cloudtrail_logs.json b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/cloudtrail_logs.json new file mode 100644 index 00000000000..5b5acc3486c --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/cloudtrail_logs.json @@ -0,0 +1,200 @@ +{"index": {"_id": "1"}} +{"start_time": "2025-08-21T10:41:56.416081+0000", "awsRegion": "ap-south-1", "sourceIPAddress": "116.142.58.92", "eventSource": "logs.amazonaws.com", "eventName": "DescribeMetricFilters", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "598715677952", "userIdentity.sessionContext.sessionIssuer.userName": "DatabaseAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::598715677952:role/DatabaseAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "2"}} +{"start_time": "2025-07-25T21:56:18.416081+0000", "awsRegion": "eu-north-1", "sourceIPAddress": "events.amazonaws.com", "eventSource": "dynamodb.amazonaws.com", "eventName": "DescribeTable", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AWSService", "userIdentity.accountId": "", "userIdentity.sessionContext.sessionIssuer.userName": "", "userIdentity.sessionContext.sessionIssuer.arn": "", "userIdentity.sessionContext.sessionIssuer.type": ""} +{"index": {"_id": "3"}} +{"start_time": "2025-08-24T01:17:14.416081+0000", "awsRegion": "eu-central-1", "sourceIPAddress": "140.38.65.165", "eventSource": "s3.amazonaws.com", "eventName": "CreateBucket", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "287645373404", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::287645373404:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "4"}} +{"start_time": "2025-07-11T22:22:53.416081+0000", "awsRegion": "sa-east-1", "sourceIPAddress": "directquery.opensearchservice.amazonaws.com", "eventSource": "cloudwatch.amazonaws.com", "eventName": "DeleteAlarm", "eventCategory": "Management", "event_count": 5, "userIdentity.type": "AWSService", "userIdentity.accountId": "210622981215", "userIdentity.sessionContext.sessionIssuer.userName": "cloudwatch-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::210622981215:role/cloudwatch-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "5"}} +{"start_time": "2025-06-26T10:51:39.416081+0000", "awsRegion": "ap-northeast-1", "sourceIPAddress": "58.138.87.219", "eventSource": "sts.amazonaws.com", "eventName": "AssumeRole", "eventCategory": "Management", "event_count": 6, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "343123305904", "userIdentity.sessionContext.sessionIssuer.userName": "AppDeveloper", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::343123305904:role/AppDeveloper", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "6"}} +{"start_time": "2025-06-21T15:15:59.416081+0000", "awsRegion": "ap-southeast-2", "sourceIPAddress": "180.3.121.23", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeInstances", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "Root", "userIdentity.accountId": "774538043323", "userIdentity.sessionContext.sessionIssuer.userName": "AppDeveloper", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::774538043323:role/AppDeveloper", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "7"}} +{"start_time": "2025-09-28T19:41:45.416081+0000", "awsRegion": "ap-south-1", "sourceIPAddress": "207.28.12.237", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeSecurityGroups", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "Root", "userIdentity.accountId": "190225759807", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::190225759807:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "8"}} +{"start_time": "2025-09-30T07:52:11.416081+0000", "awsRegion": "us-east-2", "sourceIPAddress": "directquery.opensearchservice.amazonaws.com", "eventSource": "sts.amazonaws.com", "eventName": "GetCallerIdentity", "eventCategory": "Management", "event_count": 4, "userIdentity.type": "AWSService", "userIdentity.accountId": "999658550876", "userIdentity.sessionContext.sessionIssuer.userName": "sts-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::999658550876:role/sts-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "9"}} +{"start_time": "2025-06-08T06:53:51.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "monitoring.amazonaws.com", "eventSource": "logs.amazonaws.com", "eventName": "DescribeMetricFilters", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "837288668719", "userIdentity.sessionContext.sessionIssuer.userName": "logs-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::837288668719:role/logs-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "10"}} +{"start_time": "2025-06-07T00:10:41.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "210.84.80.238", "eventSource": "cloudformation.amazonaws.com", "eventName": "ValidateTemplate", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "689811372963", "userIdentity.sessionContext.sessionIssuer.userName": "Operator", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::689811372963:role/Operator", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "11"}} +{"start_time": "2025-05-09T07:26:04.416081+0000", "awsRegion": "eu-central-1", "sourceIPAddress": "222.105.156.190", "eventSource": "rds.amazonaws.com", "eventName": "DescribeDBClusters", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "585894403030", "userIdentity.sessionContext.sessionIssuer.userName": "SystemAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::585894403030:role/SystemAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "12"}} +{"start_time": "2025-05-12T07:08:05.416081+0000", "awsRegion": "ap-northeast-1", "sourceIPAddress": "104.201.253.14", "eventSource": "lambda.amazonaws.com", "eventName": "UpdateFunctionCode", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "369809605951", "userIdentity.sessionContext.sessionIssuer.userName": "IAMManager", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::369809605951:role/IAMManager", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "13"}} +{"start_time": "2025-09-23T22:57:46.416081+0000", "awsRegion": "ap-south-1", "sourceIPAddress": "207.220.131.48", "eventSource": "rds.amazonaws.com", "eventName": "CreateDBCluster", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "390835871147", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::390835871147:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "14"}} +{"start_time": "2025-08-19T07:12:33.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "events.amazonaws.com", "eventSource": "cloudformation.amazonaws.com", "eventName": "CreateStack", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AWSService", "userIdentity.accountId": "688034119877", "userIdentity.sessionContext.sessionIssuer.userName": "cloudformation-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::688034119877:role/cloudformation-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "15"}} +{"start_time": "2025-09-07T01:54:52.416081+0000", "awsRegion": "eu-west-2", "sourceIPAddress": "monitoring.amazonaws.com", "eventSource": "lambda.amazonaws.com", "eventName": "InvokeFunction", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AWSService", "userIdentity.accountId": "694176454116", "userIdentity.sessionContext.sessionIssuer.userName": "lambda-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::694176454116:role/lambda-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "16"}} +{"start_time": "2025-09-20T10:06:32.416081+0000", "awsRegion": "us-east-2", "sourceIPAddress": "190.240.94.208", "eventSource": "cloudwatch.amazonaws.com", "eventName": "ListMetrics", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "129777904916", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::129777904916:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "17"}} +{"start_time": "2025-06-15T22:31:16.416081+0000", "awsRegion": "us-east-2", "sourceIPAddress": "20.51.115.73", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeInstances", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "635086364222", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::635086364222:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "18"}} +{"start_time": "2025-05-21T08:53:04.416081+0000", "awsRegion": "eu-west-2", "sourceIPAddress": "129.74.71.250", "eventSource": "iam.amazonaws.com", "eventName": "ListUsers", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "957340598755", "userIdentity.sessionContext.sessionIssuer.userName": "IAMManager", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::957340598755:role/IAMManager", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "19"}} +{"start_time": "2025-06-22T00:44:02.416081+0000", "awsRegion": "ap-southeast-1", "sourceIPAddress": "64.21.1.111", "eventSource": "cloudwatch.amazonaws.com", "eventName": "PutMetricData", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "101369192494", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::101369192494:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "20"}} +{"start_time": "2025-10-20T08:31:28.416081+0000", "awsRegion": "us-east-1", "sourceIPAddress": "112.60.8.202", "eventSource": "rds.amazonaws.com", "eventName": "DescribeDBInstances", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "707538569947", "userIdentity.sessionContext.sessionIssuer.userName": "IAMManager", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::707538569947:role/IAMManager", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "21"}} +{"start_time": "2025-06-18T03:50:40.416081+0000", "awsRegion": "ap-south-1", "sourceIPAddress": "200.96.40.13", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeImages", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "194701000040", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::194701000040:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "22"}} +{"start_time": "2025-05-26T11:47:22.416081+0000", "awsRegion": "eu-central-1", "sourceIPAddress": "211.155.90.204", "eventSource": "logs.amazonaws.com", "eventName": "CreateLogStream", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "969780388008", "userIdentity.sessionContext.sessionIssuer.userName": "NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::969780388008:role/NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "23"}} +{"start_time": "2025-10-29T01:31:22.416081+0000", "awsRegion": "eu-west-1", "sourceIPAddress": "165.92.245.184", "eventSource": "s3.amazonaws.com", "eventName": "GetObject", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "582431165658", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::582431165658:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "24"}} +{"start_time": "2025-09-24T16:54:04.416081+0000", "awsRegion": "us-east-1", "sourceIPAddress": "200.213.240.187", "eventSource": "iam.amazonaws.com", "eventName": "GetUser", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "731138940743", "userIdentity.sessionContext.sessionIssuer.userName": "ServiceAccount", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::731138940743:role/ServiceAccount", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "25"}} +{"start_time": "2025-09-29T10:03:12.416081+0000", "awsRegion": "ap-southeast-1", "sourceIPAddress": "209.52.135.59", "eventSource": "s3.amazonaws.com", "eventName": "PutObject", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "320734496850", "userIdentity.sessionContext.sessionIssuer.userName": "NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::320734496850:role/NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "26"}} +{"start_time": "2025-07-21T00:00:52.416081+0000", "awsRegion": "sa-east-1", "sourceIPAddress": "208.163.194.161", "eventSource": "rds.amazonaws.com", "eventName": "DeleteDBInstance", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "139248107427", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::139248107427:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "27"}} +{"start_time": "2025-08-18T10:32:39.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "218.57.101.219", "eventSource": "lambda.amazonaws.com", "eventName": "InvokeFunction", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "665143597263", "userIdentity.sessionContext.sessionIssuer.userName": "SystemAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::665143597263:role/SystemAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "28"}} +{"start_time": "2025-10-24T21:03:23.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "111.48.148.225", "eventSource": "logs.amazonaws.com", "eventName": "DescribeLogGroups", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "780949047078", "userIdentity.sessionContext.sessionIssuer.userName": "DevOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::780949047078:role/DevOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "29"}} +{"start_time": "2025-05-26T23:08:31.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "216.101.61.86", "eventSource": "s3.amazonaws.com", "eventName": "ListBucket", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "824299598130", "userIdentity.sessionContext.sessionIssuer.userName": "ServiceAccount", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::824299598130:role/ServiceAccount", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "30"}} +{"start_time": "2025-08-20T16:59:39.416081+0000", "awsRegion": "us-east-2", "sourceIPAddress": "138.49.85.79", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeSecurityGroups", "eventCategory": "Management", "event_count": 10, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "848792385237", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::848792385237:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "31"}} +{"start_time": "2025-10-12T04:06:50.416081+0000", "awsRegion": "eu-north-1", "sourceIPAddress": "187.42.164.190", "eventSource": "s3.amazonaws.com", "eventName": "DeleteObject", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "724139824382", "userIdentity.sessionContext.sessionIssuer.userName": "AppDeveloper", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::724139824382:role/AppDeveloper", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "32"}} +{"start_time": "2025-08-28T19:21:54.416081+0000", "awsRegion": "sa-east-1", "sourceIPAddress": "lambda.amazonaws.com", "eventSource": "rds.amazonaws.com", "eventName": "ModifyDBInstance", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "", "userIdentity.sessionContext.sessionIssuer.userName": "", "userIdentity.sessionContext.sessionIssuer.arn": "", "userIdentity.sessionContext.sessionIssuer.type": ""} +{"index": {"_id": "33"}} +{"start_time": "2025-08-18T03:44:47.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "213.35.129.121", "eventSource": "iam.amazonaws.com", "eventName": "ListRoles", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "302115356855", "userIdentity.sessionContext.sessionIssuer.userName": "SystemAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::302115356855:role/SystemAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "34"}} +{"start_time": "2025-10-13T00:37:16.416081+0000", "awsRegion": "sa-east-1", "sourceIPAddress": "194.248.127.234", "eventSource": "cloudwatch.amazonaws.com", "eventName": "PutMetricData", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "SAMLUser", "userIdentity.accountId": "704865030236", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::704865030236:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "35"}} +{"start_time": "2025-05-21T09:24:16.416081+0000", "awsRegion": "ap-northeast-1", "sourceIPAddress": "131.241.74.136", "eventSource": "logs.amazonaws.com", "eventName": "DeleteLogGroup", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "754940854222", "userIdentity.sessionContext.sessionIssuer.userName": "Developer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::754940854222:role/Developer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "36"}} +{"start_time": "2025-09-20T16:19:34.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "202.193.105.185", "eventSource": "s3.amazonaws.com", "eventName": "CreateBucket", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "104443609816", "userIdentity.sessionContext.sessionIssuer.userName": "s3-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::104443609816:role/s3-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "37"}} +{"start_time": "2025-08-02T12:00:53.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "93.71.143.100", "eventSource": "cloudformation.amazonaws.com", "eventName": "DeleteStack", "eventCategory": "Management", "event_count": 6, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "981549367019", "userIdentity.sessionContext.sessionIssuer.userName": "Admin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::981549367019:role/Admin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "38"}} +{"start_time": "2025-06-26T13:22:30.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "196.184.128.200", "eventSource": "ec2.amazonaws.com", "eventName": "RunInstances", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "", "userIdentity.sessionContext.sessionIssuer.userName": "", "userIdentity.sessionContext.sessionIssuer.arn": "", "userIdentity.sessionContext.sessionIssuer.type": ""} +{"index": {"_id": "39"}} +{"start_time": "2025-05-19T18:20:54.416081+0000", "awsRegion": "ap-southeast-1", "sourceIPAddress": "189.205.235.121", "eventSource": "logs.amazonaws.com", "eventName": "CreateLogGroup", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "810212098173", "userIdentity.sessionContext.sessionIssuer.userName": "Analyst", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::810212098173:role/Analyst", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "40"}} +{"start_time": "2025-09-15T15:06:56.416081+0000", "awsRegion": "us-east-2", "sourceIPAddress": "28.21.190.200", "eventSource": "s3.amazonaws.com", "eventName": "CreateBucket", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "222901905268", "userIdentity.sessionContext.sessionIssuer.userName": "DevOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::222901905268:role/DevOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "41"}} +{"start_time": "2025-05-23T04:02:07.416081+0000", "awsRegion": "ap-northeast-1", "sourceIPAddress": "214.252.35.45", "eventSource": "ec2.amazonaws.com", "eventName": "CreateSecurityGroup", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "961914318249", "userIdentity.sessionContext.sessionIssuer.userName": "ec2-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::961914318249:role/ec2-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "42"}} +{"start_time": "2025-10-06T19:44:08.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "events.amazonaws.com", "eventSource": "lambda.amazonaws.com", "eventName": "CreateFunction", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AWSService", "userIdentity.accountId": "645341622297", "userIdentity.sessionContext.sessionIssuer.userName": "lambda-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::645341622297:role/lambda-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "43"}} +{"start_time": "2025-07-09T14:20:13.416081+0000", "awsRegion": "ap-southeast-2", "sourceIPAddress": "38.13.198.220", "eventSource": "cloudwatch.amazonaws.com", "eventName": "DeleteAlarm", "eventCategory": "Management", "event_count": 4, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "956578812116", "userIdentity.sessionContext.sessionIssuer.userName": "DatabaseAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::956578812116:role/DatabaseAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "44"}} +{"start_time": "2025-09-09T00:18:58.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "207.179.104.251", "eventSource": "rds.amazonaws.com", "eventName": "ModifyDBInstance", "eventCategory": "Management", "event_count": 6, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "425211551275", "userIdentity.sessionContext.sessionIssuer.userName": "AppDeveloper", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::425211551275:role/AppDeveloper", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "45"}} +{"start_time": "2025-06-27T13:16:48.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "192.220.137.197", "eventSource": "sts.amazonaws.com", "eventName": "DecodeAuthorizationMessage", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "577713527093", "userIdentity.sessionContext.sessionIssuer.userName": "CloudOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::577713527093:role/CloudOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "46"}} +{"start_time": "2025-05-27T04:41:11.416081+0000", "awsRegion": "ap-southeast-1", "sourceIPAddress": "180.192.1.128", "eventSource": "dynamodb.amazonaws.com", "eventName": "GetItem", "eventCategory": "Management", "event_count": 4, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "123105719850", "userIdentity.sessionContext.sessionIssuer.userName": "NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::123105719850:role/NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "47"}} +{"start_time": "2025-10-19T12:18:26.416081+0000", "awsRegion": "ap-southeast-1", "sourceIPAddress": "193.61.101.35", "eventSource": "lambda.amazonaws.com", "eventName": "GetFunction", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "364182939613", "userIdentity.sessionContext.sessionIssuer.userName": "CloudOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::364182939613:role/CloudOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "48"}} +{"start_time": "2025-08-24T07:28:57.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "193.141.133.115", "eventSource": "s3.amazonaws.com", "eventName": "DeleteObject", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "469189613276", "userIdentity.sessionContext.sessionIssuer.userName": "Admin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::469189613276:role/Admin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "49"}} +{"start_time": "2025-09-04T00:17:31.416081+0000", "awsRegion": "us-east-2", "sourceIPAddress": "222.107.249.20", "eventSource": "lambda.amazonaws.com", "eventName": "DeleteFunction", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "360608615163", "userIdentity.sessionContext.sessionIssuer.userName": "DatabaseAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::360608615163:role/DatabaseAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "50"}} +{"start_time": "2025-10-23T07:44:47.416081+0000", "awsRegion": "eu-north-1", "sourceIPAddress": "s3.amazonaws.com", "eventSource": "sts.amazonaws.com", "eventName": "GetCallerIdentity", "eventCategory": "Management", "event_count": 6, "userIdentity.type": "AWSService", "userIdentity.accountId": "774442148865", "userIdentity.sessionContext.sessionIssuer.userName": "sts-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::774442148865:role/sts-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "51"}} +{"start_time": "2025-07-21T15:14:59.416081+0000", "awsRegion": "us-east-1", "sourceIPAddress": "cloudwatch.amazonaws.com", "eventSource": "sts.amazonaws.com", "eventName": "GetCallerIdentity", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AWSService", "userIdentity.accountId": "942584532060", "userIdentity.sessionContext.sessionIssuer.userName": "sts-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::942584532060:role/sts-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "52"}} +{"start_time": "2025-05-31T06:42:25.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "146.172.80.16", "eventSource": "dynamodb.amazonaws.com", "eventName": "PutItem", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "IAMUser", "userIdentity.accountId": "507772163712", "userIdentity.sessionContext.sessionIssuer.userName": "CloudOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::507772163712:role/CloudOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "53"}} +{"start_time": "2025-09-06T18:33:24.416081+0000", "awsRegion": "eu-west-2", "sourceIPAddress": "203.211.64.233", "eventSource": "cloudwatch.amazonaws.com", "eventName": "ListMetrics", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "607811186301", "userIdentity.sessionContext.sessionIssuer.userName": "DevOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::607811186301:role/DevOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "54"}} +{"start_time": "2025-07-06T19:30:14.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "128.215.181.68", "eventSource": "cloudformation.amazonaws.com", "eventName": "DescribeStacks", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "434364154193", "userIdentity.sessionContext.sessionIssuer.userName": "NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::434364154193:role/NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "55"}} +{"start_time": "2025-09-16T13:21:55.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "s3.amazonaws.com", "eventSource": "cloudwatch.amazonaws.com", "eventName": "CreateAlarm", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "974366579111", "userIdentity.sessionContext.sessionIssuer.userName": "cloudwatch-service", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::974366579111:role/cloudwatch-service-role", "userIdentity.sessionContext.sessionIssuer.type": "ServiceRole"} +{"index": {"_id": "56"}} +{"start_time": "2025-10-21T07:09:47.416081+0000", "awsRegion": "ap-south-1", "sourceIPAddress": "lambda.amazonaws.com", "eventSource": "cloudformation.amazonaws.com", "eventName": "ValidateTemplate", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "", "userIdentity.sessionContext.sessionIssuer.userName": "", "userIdentity.sessionContext.sessionIssuer.arn": "", "userIdentity.sessionContext.sessionIssuer.type": ""} +{"index": {"_id": "57"}} +{"start_time": "2025-09-09T14:11:05.416081+0000", "awsRegion": "eu-north-1", "sourceIPAddress": "cloudwatch.amazonaws.com", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeImages", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "", "userIdentity.sessionContext.sessionIssuer.userName": "", "userIdentity.sessionContext.sessionIssuer.arn": "", "userIdentity.sessionContext.sessionIssuer.type": ""} +{"index": {"_id": "58"}} +{"start_time": "2025-07-04T20:06:01.416081+0000", "awsRegion": "sa-east-1", "sourceIPAddress": "221.48.241.206", "eventSource": "rds.amazonaws.com", "eventName": "DeleteDBInstance", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "176954084253", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::176954084253:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "59"}} +{"start_time": "2025-08-29T01:10:37.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "137.238.36.73", "eventSource": "dynamodb.amazonaws.com", "eventName": "DeleteTable", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "755690094115", "userIdentity.sessionContext.sessionIssuer.userName": "ServiceAccount", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::755690094115:role/ServiceAccount", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "60"}} +{"start_time": "2025-08-14T17:12:21.416081+0000", "awsRegion": "us-east-1", "sourceIPAddress": "cloudformation.amazonaws.com", "eventSource": "iam.amazonaws.com", "eventName": "CreateUser", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "", "userIdentity.sessionContext.sessionIssuer.userName": "", "userIdentity.sessionContext.sessionIssuer.arn": "", "userIdentity.sessionContext.sessionIssuer.type": ""} +{"index": {"_id": "61"}} +{"start_time": "2025-10-12T06:31:48.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "88.176.225.163", "eventSource": "rds.amazonaws.com", "eventName": "CreateDBCluster", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "541471480925", "userIdentity.sessionContext.sessionIssuer.userName": "Developer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::541471480925:role/Developer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "62"}} +{"start_time": "2025-06-05T13:03:51.416081+0000", "awsRegion": "ap-southeast-2", "sourceIPAddress": "197.116.253.82", "eventSource": "sts.amazonaws.com", "eventName": "GetCallerIdentity", "eventCategory": "Management", "event_count": 5, "userIdentity.type": "IAMUser", "userIdentity.accountId": "763171614417", "userIdentity.sessionContext.sessionIssuer.userName": "CloudOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::763171614417:role/CloudOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "63"}} +{"start_time": "2025-07-11T18:05:24.416081+0000", "awsRegion": "eu-north-1", "sourceIPAddress": "159.194.69.16", "eventSource": "iam.amazonaws.com", "eventName": "ListRoles", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "108752924584", "userIdentity.sessionContext.sessionIssuer.userName": "NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::108752924584:role/NetworkAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "64"}} +{"start_time": "2025-07-30T03:04:20.416081+0000", "awsRegion": "ap-northeast-1", "sourceIPAddress": "220.188.96.122", "eventSource": "dynamodb.amazonaws.com", "eventName": "GetItem", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AWSService", "userIdentity.accountId": "", "userIdentity.sessionContext.sessionIssuer.userName": "", "userIdentity.sessionContext.sessionIssuer.arn": "", "userIdentity.sessionContext.sessionIssuer.type": ""} +{"index": {"_id": "65"}} +{"start_time": "2025-06-13T02:15:32.416081+0000", "awsRegion": "sa-east-1", "sourceIPAddress": "217.125.68.155", "eventSource": "cloudwatch.amazonaws.com", "eventName": "CreateAlarm", "eventCategory": "Management", "event_count": 4, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "962191889410", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::962191889410:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "66"}} +{"start_time": "2025-06-22T05:55:05.416081+0000", "awsRegion": "eu-north-1", "sourceIPAddress": "94.172.83.93", "eventSource": "dynamodb.amazonaws.com", "eventName": "PutItem", "eventCategory": "Management", "event_count": 6, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "906277557614", "userIdentity.sessionContext.sessionIssuer.userName": "DevOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::906277557614:role/DevOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "67"}} +{"start_time": "2025-05-15T09:30:30.416081+0000", "awsRegion": "ap-south-1", "sourceIPAddress": "54.14.58.162", "eventSource": "ec2.amazonaws.com", "eventName": "CreateSecurityGroup", "eventCategory": "Management", "event_count": 4, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "357011035635", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::357011035635:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "68"}} +{"start_time": "2025-09-07T08:18:22.416081+0000", "awsRegion": "ap-northeast-1", "sourceIPAddress": "214.5.176.55", "eventSource": "s3.amazonaws.com", "eventName": "GetObject", "eventCategory": "Management", "event_count": 10, "userIdentity.type": "Root", "userIdentity.accountId": "648490865789", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::648490865789:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "69"}} +{"start_time": "2025-09-16T17:45:04.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "219.10.197.244", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeImages", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "Root", "userIdentity.accountId": "285431994062", "userIdentity.sessionContext.sessionIssuer.userName": "Operator", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::285431994062:role/Operator", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "70"}} +{"start_time": "2025-07-06T10:35:46.416081+0000", "awsRegion": "eu-west-1", "sourceIPAddress": "203.34.0.161", "eventSource": "dynamodb.amazonaws.com", "eventName": "Query", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "638335633292", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::638335633292:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "71"}} +{"start_time": "2025-05-27T07:18:01.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "111.82.52.20", "eventSource": "cloudwatch.amazonaws.com", "eventName": "PutMetricData", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "244978908806", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::244978908806:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "72"}} +{"start_time": "2025-06-03T08:36:25.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "213.183.2.101", "eventSource": "sts.amazonaws.com", "eventName": "AssumeRoleWithWebIdentity", "eventCategory": "Management", "event_count": 6, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "333360534910", "userIdentity.sessionContext.sessionIssuer.userName": "IAMManager", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::333360534910:role/IAMManager", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "73"}} +{"start_time": "2025-09-16T22:42:17.416081+0000", "awsRegion": "eu-central-1", "sourceIPAddress": "222.167.230.206", "eventSource": "rds.amazonaws.com", "eventName": "DeleteDBInstance", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "Root", "userIdentity.accountId": "513684053971", "userIdentity.sessionContext.sessionIssuer.userName": "Admin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::513684053971:role/Admin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "74"}} +{"start_time": "2025-10-10T05:29:41.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "167.99.84.23", "eventSource": "dynamodb.amazonaws.com", "eventName": "PutItem", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "335693333387", "userIdentity.sessionContext.sessionIssuer.userName": "CloudOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::335693333387:role/CloudOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "75"}} +{"start_time": "2025-05-27T20:53:26.416081+0000", "awsRegion": "sa-east-1", "sourceIPAddress": "206.98.239.164", "eventSource": "cloudformation.amazonaws.com", "eventName": "CreateStack", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "Root", "userIdentity.accountId": "976086506951", "userIdentity.sessionContext.sessionIssuer.userName": "ServiceAccount", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::976086506951:role/ServiceAccount", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "76"}} +{"start_time": "2025-05-21T10:32:52.416081+0000", "awsRegion": "ap-southeast-1", "sourceIPAddress": "177.203.238.142", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeSecurityGroups", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "Root", "userIdentity.accountId": "334083459519", "userIdentity.sessionContext.sessionIssuer.userName": "Operator", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::334083459519:role/Operator", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "77"}} +{"start_time": "2025-06-22T04:32:04.416081+0000", "awsRegion": "ap-southeast-2", "sourceIPAddress": "74.173.179.125", "eventSource": "s3.amazonaws.com", "eventName": "DeleteObject", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "395453110579", "userIdentity.sessionContext.sessionIssuer.userName": "ServiceAccount", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::395453110579:role/ServiceAccount", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "78"}} +{"start_time": "2025-10-10T01:56:08.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "150.37.200.17", "eventSource": "s3.amazonaws.com", "eventName": "ListBucket", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "Root", "userIdentity.accountId": "453723642994", "userIdentity.sessionContext.sessionIssuer.userName": "Developer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::453723642994:role/Developer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "79"}} +{"start_time": "2025-05-16T23:00:28.416081+0000", "awsRegion": "ap-south-1", "sourceIPAddress": "196.7.118.130", "eventSource": "cloudformation.amazonaws.com", "eventName": "UpdateStack", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "Root", "userIdentity.accountId": "196141700677", "userIdentity.sessionContext.sessionIssuer.userName": "SystemAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::196141700677:role/SystemAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "80"}} +{"start_time": "2025-09-11T16:28:00.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "23.97.139.102", "eventSource": "logs.amazonaws.com", "eventName": "CreateLogStream", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "796293792090", "userIdentity.sessionContext.sessionIssuer.userName": "Developer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::796293792090:role/Developer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "81"}} +{"start_time": "2025-07-02T04:04:47.416081+0000", "awsRegion": "ap-southeast-2", "sourceIPAddress": "69.161.196.94", "eventSource": "rds.amazonaws.com", "eventName": "DescribeDBClusters", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "354486179284", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::354486179284:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "82"}} +{"start_time": "2025-05-20T17:53:28.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "167.216.134.139", "eventSource": "dynamodb.amazonaws.com", "eventName": "GetItem", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "867655343496", "userIdentity.sessionContext.sessionIssuer.userName": "DevOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::867655343496:role/DevOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "83"}} +{"start_time": "2025-06-05T00:50:13.416081+0000", "awsRegion": "ap-northeast-1", "sourceIPAddress": "196.108.51.12", "eventSource": "iam.amazonaws.com", "eventName": "CreateUser", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "214855083663", "userIdentity.sessionContext.sessionIssuer.userName": "SystemAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::214855083663:role/SystemAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "84"}} +{"start_time": "2025-06-20T08:36:51.416081+0000", "awsRegion": "ap-southeast-1", "sourceIPAddress": "168.59.87.29", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeImages", "eventCategory": "Management", "event_count": 4, "userIdentity.type": "Root", "userIdentity.accountId": "320011753868", "userIdentity.sessionContext.sessionIssuer.userName": "DevOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::320011753868:role/DevOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "85"}} +{"start_time": "2025-07-20T08:25:37.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "39.133.4.149", "eventSource": "sts.amazonaws.com", "eventName": "GetSessionToken", "eventCategory": "Management", "event_count": 5, "userIdentity.type": "Root", "userIdentity.accountId": "879473474150", "userIdentity.sessionContext.sessionIssuer.userName": "CloudOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::879473474150:role/CloudOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "86"}} +{"start_time": "2025-08-06T21:41:57.416081+0000", "awsRegion": "eu-north-1", "sourceIPAddress": "179.127.44.147", "eventSource": "lambda.amazonaws.com", "eventName": "InvokeFunction", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "824961969430", "userIdentity.sessionContext.sessionIssuer.userName": "AppDeveloper", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::824961969430:role/AppDeveloper", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "87"}} +{"start_time": "2025-05-25T09:32:41.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "166.110.103.106", "eventSource": "s3.amazonaws.com", "eventName": "PutObject", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "747800232175", "userIdentity.sessionContext.sessionIssuer.userName": "Admin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::747800232175:role/Admin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "88"}} +{"start_time": "2025-07-08T06:15:31.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "213.10.201.166", "eventSource": "s3.amazonaws.com", "eventName": "GetObject", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "413425936817", "userIdentity.sessionContext.sessionIssuer.userName": "SystemAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::413425936817:role/SystemAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "89"}} +{"start_time": "2025-07-21T02:57:04.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "173.36.144.235", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeImages", "eventCategory": "Management", "event_count": 6, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "603528065357", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::603528065357:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "90"}} +{"start_time": "2025-10-11T12:13:27.416081+0000", "awsRegion": "ap-northeast-1", "sourceIPAddress": "82.141.122.180", "eventSource": "lambda.amazonaws.com", "eventName": "InvokeFunction", "eventCategory": "Management", "event_count": 3, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "616451695486", "userIdentity.sessionContext.sessionIssuer.userName": "DataEngineer", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::616451695486:role/DataEngineer", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "91"}} +{"start_time": "2025-09-12T17:02:22.416081+0000", "awsRegion": "eu-west-1", "sourceIPAddress": "207.116.155.66", "eventSource": "rds.amazonaws.com", "eventName": "DescribeDBInstances", "eventCategory": "Management", "event_count": 2, "userIdentity.type": "Root", "userIdentity.accountId": "535186375991", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::535186375991:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "92"}} +{"start_time": "2025-05-06T06:56:03.416081+0000", "awsRegion": "us-west-2", "sourceIPAddress": "195.38.135.90", "eventSource": "dynamodb.amazonaws.com", "eventName": "GetItem", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "718159382250", "userIdentity.sessionContext.sessionIssuer.userName": "SecurityTeam", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::718159382250:role/SecurityTeam", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "93"}} +{"start_time": "2025-08-13T21:10:23.416081+0000", "awsRegion": "ca-central-1", "sourceIPAddress": "133.163.135.128", "eventSource": "iam.amazonaws.com", "eventName": "GetUser", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "127700845301", "userIdentity.sessionContext.sessionIssuer.userName": "DevOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::127700845301:role/DevOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "94"}} +{"start_time": "2025-09-05T19:29:52.416081+0000", "awsRegion": "eu-west-1", "sourceIPAddress": "179.105.4.100", "eventSource": "rds.amazonaws.com", "eventName": "ModifyDBInstance", "eventCategory": "Management", "event_count": 5, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "380381714172", "userIdentity.sessionContext.sessionIssuer.userName": "CloudOps", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::380381714172:role/CloudOps", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "95"}} +{"start_time": "2025-09-27T01:50:21.416081+0000", "awsRegion": "ap-southeast-2", "sourceIPAddress": "99.124.29.90", "eventSource": "s3.amazonaws.com", "eventName": "CreateBucket", "eventCategory": "Management", "event_count": 5, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "177098224866", "userIdentity.sessionContext.sessionIssuer.userName": "ServiceAccount", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::177098224866:role/ServiceAccount", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "96"}} +{"start_time": "2025-10-29T14:13:48.416081+0000", "awsRegion": "eu-west-1", "sourceIPAddress": "46.88.166.22", "eventSource": "cloudformation.amazonaws.com", "eventName": "CreateStack", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "IAMUser", "userIdentity.accountId": "473285541798", "userIdentity.sessionContext.sessionIssuer.userName": "IAMManager", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::473285541798:role/IAMManager", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "97"}} +{"start_time": "2025-07-15T23:00:22.416081+0000", "awsRegion": "us-east-2", "sourceIPAddress": "212.210.83.192", "eventSource": "iam.amazonaws.com", "eventName": "DeleteRole", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "427138684616", "userIdentity.sessionContext.sessionIssuer.userName": "Analyst", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::427138684616:role/Analyst", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "98"}} +{"start_time": "2025-06-13T20:40:17.416081+0000", "awsRegion": "eu-west-2", "sourceIPAddress": "152.13.36.25", "eventSource": "ec2.amazonaws.com", "eventName": "DescribeSecurityGroups", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "507268228830", "userIdentity.sessionContext.sessionIssuer.userName": "Analyst", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::507268228830:role/Analyst", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "99"}} +{"start_time": "2025-06-14T05:46:08.416081+0000", "awsRegion": "us-west-1", "sourceIPAddress": "11.167.99.128", "eventSource": "rds.amazonaws.com", "eventName": "CreateDBCluster", "eventCategory": "Management", "event_count": 1, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "685389690394", "userIdentity.sessionContext.sessionIssuer.userName": "DatabaseAdmin", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::685389690394:role/DatabaseAdmin", "userIdentity.sessionContext.sessionIssuer.type": "Role"} +{"index": {"_id": "100"}} +{"start_time": "2025-09-23T22:55:31.416081+0000", "awsRegion": "ap-southeast-1", "sourceIPAddress": "70.225.112.188", "eventSource": "ec2.amazonaws.com", "eventName": "TerminateInstances", "eventCategory": "Management", "event_count": 5, "userIdentity.type": "AssumedRole", "userIdentity.accountId": "444563582819", "userIdentity.sessionContext.sessionIssuer.userName": "Analyst", "userIdentity.sessionContext.sessionIssuer.arn": "arn:aws:iam::444563582819:role/Analyst", "userIdentity.sessionContext.sessionIssuer.type": "Role"} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/nfw_logs.json b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/nfw_logs.json new file mode 100644 index 00000000000..08c619b2c97 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/nfw_logs.json @@ -0,0 +1,200 @@ +{"index":{"_id":"1"}} +{"firewall_name":"use1-fw","availability_zone":"us-east-1a","event_timestamp":"1740408108","event":{"tcp":{"tcp_flags":"13","syn":true,"fin":true,"ack":true},"app_proto":"unknown","src_ip":"10.170.18.235","src_port":60448,"netflow":{"pkts":21,"bytes":1208,"start":"2025-02-24T14:39:44.200427+0000","end":"2025-02-24T14:40:47.636922+0000","age":63,"min_ttl":63,"max_ttl":63},"event_type":"netflow","flow_id":16402963969408,"dest_ip":"8.8.8.8","proto":"TCP","dest_port":443,"timestamp":"2025-02-24T14:41:48.404578+0000"}} +{"index":{"_id":"2"}} +{"firewall_name":"use2-fw","availability_zone":"us-east-1a","event_timestamp":"1740407457","event":{"app_proto":"tls","src_ip":"10.170.18.235","src_port":36434,"event_type":"alert","alert":{"severity":3,"signature_id":4,"rev":0,"signature":"aws:alert_established action","action":"blocked","category":""},"flow_id":328474246651493,"dest_ip":"54.146.42.172","proto":"TCP","verdict":{"action":"drop"},"tls":{"sni":"checkip.amazonaws.com","version":"UNDETERMINED"},"dest_port":443,"pkt_src":"geneve encapsulation","timestamp":"2025-02-24T14:30:57.933410+0000","direction":"to_server"}} +{"index":{"_id":"3"}} +{"firewall_name":"use3-fw","availability_zone":"us-east-1a","event_timestamp":"1740407564","event":{"tcp":{"tcp_flags":"17","syn":true,"fin":true,"rst":true,"ack":true},"app_proto":"http","src_ip":"54.242.115.112","src_port":80,"netflow":{"pkts":11,"bytes":568,"start":"2025-02-24T14:30:48.720252+0000","end":"2025-02-24T14:31:42.291293+0000","age":54,"min_ttl":49,"max_ttl":252},"event_type":"netflow","flow_id":278710129222792,"dest_ip":"10.170.18.235","proto":"TCP","dest_port":59336,"timestamp":"2025-02-24T14:32:44.925034+0000"}} +{"index":{"_id":"4"}} +{"firewall_name":"use4-fw","availability_zone":"us-east-1a","event_timestamp":"1740408108","event":{"tcp":{"tcp_flags":"13","syn":true,"fin":true,"ack":true},"app_proto":"unknown","src_ip":"8.8.8.8","src_port":443,"netflow":{"pkts":11,"bytes":580,"start":"2025-02-24T14:39:44.200427+0000","end":"2025-02-24T14:40:47.636922+0000","age":63,"min_ttl":117,"max_ttl":248},"event_type":"netflow","flow_id":16402963969408,"dest_ip":"10.170.18.235","proto":"TCP","dest_port":60448,"timestamp":"2025-02-24T14:41:48.404721+0000"}} +{"index":{"_id":"5"}} +{"firewall_name":"use5-fw","availability_zone":"us-east-1a","event_timestamp":"1740407448","event":{"app_proto":"http","src_ip":"10.170.18.235","src_port":59336,"event_type":"alert","alert":{"severity":3,"signature_id":4,"rev":0,"signature":"aws:alert_established action","action":"blocked","category":""},"flow_id":278710129222792,"dest_ip":"54.242.115.112","proto":"TCP","verdict":{"action":"drop"},"http":{"hostname":"checkip.amazonaws.com","url":"/","http_user_agent":"curl/8.5.0","http_method":"GET","protocol":"HTTP/1.1","length":0},"dest_port":80,"pkt_src":"geneve encapsulation","timestamp":"2025-02-24T14:30:48.723575+0000","direction":"to_server"}} +{"index":{"_id":"6"}} +{"firewall_name":"use6-fw","availability_zone":"us-east-1a","event_timestamp":"1740407424","event":{"dns":{"query":[{"type":"query","id":49938,"rrname":"checkip.amazonaws.com","rrtype":"A","tx_id":0,"opcode":0}]},"app_proto":"dns","src_ip":"10.170.18.235","src_port":41655,"event_type":"alert","alert":{"severity":3,"signature_id":3,"rev":0,"signature":"aws:alert_established action","action":"blocked","category":""},"flow_id":125463154376723,"dest_ip":"8.8.8.8","proto":"UDP","verdict":{"action":"drop"},"dest_port":53,"pkt_src":"geneve encapsulation","timestamp":"2025-02-24T14:30:24.553499+0000","direction":"to_server"}} +{"index":{"_id":"7"}} +{"firewall_name":"use13-fw","availability_zone":"us-east-1a","event_timestamp":"1740407547","event":{"src_ip":"10.170.18.235","src_port":41655,"netflow":{"pkts":1,"bytes":90,"start":"2025-02-24T14:30:24.553499+0000","end":"2025-02-24T14:30:24.553499+0000","age":0,"min_ttl":63,"max_ttl":63},"event_type":"netflow","flow_id":125463154376723,"dest_ip":"8.8.8.8","proto":"UDP","app_proto":"dns","dest_port":53,"timestamp":"2025-02-24T14:32:27.288559+0000"}} +{"index":{"_id":"8"}} +{"firewall_name":"use7-fw","availability_zone":"us-east-1a","event_timestamp":"1740407564","event":{"tcp":{"tcp_flags":"1b","syn":true,"fin":true,"psh":true,"ack":true},"app_proto":"http","src_ip":"10.170.18.235","src_port":59336,"netflow":{"pkts":22,"bytes":2088,"start":"2025-02-24T14:30:48.720252+0000","end":"2025-02-24T14:31:42.291293+0000","age":54,"min_ttl":63,"max_ttl":63},"event_type":"netflow","flow_id":278710129222792,"dest_ip":"54.242.115.112","proto":"TCP","dest_port":80,"timestamp":"2025-02-24T14:32:44.925012+0000"}} +{"index":{"_id":"9"}} +{"firewall_name":"use8-fw","availability_zone":"us-east-1a","event_timestamp":"1740407403","event":{"icmp_type":8,"src_ip":"10.170.18.235","src_port":0,"event_type":"alert","alert":{"severity":3,"signature_id":2,"rev":0,"signature":"aws:alert_established action","action":"blocked","category":""},"flow_id":955854500244454,"dest_ip":"8.8.8.8","proto":"ICMP","verdict":{"action":"drop"},"icmp_code":0,"dest_port":0,"pkt_src":"geneve encapsulation","timestamp":"2025-02-24T14:30:03.681304+0000","direction":"to_server"}} +{"index":{"_id":"10"}} +{"firewall_name":"use9-fw","availability_zone":"us-east-1a","event_timestamp":"1740407532","event":{"src_ip":"10.170.18.235","icmp_type":8,"netflow":{"pkts":9,"bytes":756,"start":"2025-02-24T14:30:03.681304+0000","end":"2025-02-24T14:30:11.857480+0000","age":8,"min_ttl":63,"max_ttl":63},"event_type":"netflow","flow_id":955854500244454,"dest_ip":"8.8.8.8","proto":"ICMP","icmp_code":0,"app_proto":"unknown","timestamp":"2025-02-24T14:32:12.475216+0000"}} +{"index":{"_id":"11"}} +{"firewall_name":"NetworkFirewallSetup-firewall","availability_zone":"us-east-1a","event_timestamp":"1743199210","event":{"tx_id":0,"app_proto":"tls","src_ip":"10.2.1.120","src_port":46736,"event_type":"alert","alert":{"severity":1,"signature_id":5,"rev":1,"signature":"not matching any TLS allowlisted FQDNs","action":"blocked","category":""},"flow_id":686450381468061,"dest_ip":"52.216.211.88","proto":"TCP","verdict":{"action":"drop"},"tls":{"sni":"s3.us-east-1.amazonaws.com","version":"UNDETERMINED"},"dest_port":443,"pkt_src":"geneve encapsulation","timestamp":"2025-03-28T22:00:10.096649+0000","direction":"to_server"}} +{"index":{"_id":"12"}} +{"firewall_name":"NetworkFirewallSetup-firewall","availability_zone":"us-east-1a","event_timestamp":"1743196105","event":{"icmp_type":8,"src_ip":"51.158.113.168","src_port":0,"event_type":"alert","alert":{"severity":3,"signature_id":1,"rev":0,"signature":"","action":"allowed","category":""},"flow_id":552803747477431,"dest_ip":"10.2.1.120","proto":"ICMP","verdict":{"action":"alert"},"icmp_code":0,"dest_port":0,"pkt_src":"geneve encapsulation","timestamp":"2025-03-28T21:08:25.521925+0000","direction":"to_server"}} +{"index":{"_id":"13"}} +{"firewall_name":"NetworkFirewallSetup-firewall","availability_zone":"us-east-1a","event_timestamp":"1743126304","event":{"tcp":{"tcp_flags":"02","syn":true},"app_proto":"unknown","src_ip":"45.82.78.100","src_port":52610,"netflow":{"pkts":1,"bytes":44,"start":"2025-03-28T01:39:13.848296+0000","end":"2025-03-28T01:39:13.848296+0000","age":400,"min_ttl":237,"max_ttl":237},"event_type":"netflow","flow_id":547181126614785,"dest_ip":"10.2.1.120","proto":"TCP","dest_port":8085,"timestamp":"2025-03-28T01:45:04.462478+0000"}} +{"index":{"_id":"14"}} +{"firewall_name":"NetworkFirewallSetup-firewall","availability_zone":"us-east-1a","event_timestamp":"1743126311","event":{"tcp":{"tcp_flags":"02","syn":true},"app_proto":"unknown","src_ip":"20.65.193.116","src_port":45550,"netflow":{"pkts":1,"bytes":40,"start":"2025-03-28T01:39:20.417619+0000","end":"2025-03-28T01:39:20.417619+0000","age":500,"min_ttl":242,"max_ttl":242},"event_type":"netflow","flow_id":104811913693211,"dest_ip":"10.2.1.120","proto":"TCP","dest_port":1433,"timestamp":"2025-03-28T01:45:11.086998+0000"}} +{"index":{"_id":"15"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740409500","event":{"tcp":{"tcp_flags":"15","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.15.15","src_port":8015,"netflow":{"pkts":16,"bytes":650,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969423,"dest_ip":"172.16.15.15","proto":"TCP","dest_port":458,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"16"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740409600","event":{"tcp":{"tcp_flags":"16","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.16.16","src_port":8016,"netflow":{"pkts":17,"bytes":660,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969424,"dest_ip":"172.16.16.16","proto":"TCP","dest_port":459,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"17"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740409700","event":{"tcp":{"tcp_flags":"17","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.17.17","src_port":8017,"netflow":{"pkts":18,"bytes":670,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969425,"dest_ip":"172.16.17.17","proto":"TCP","dest_port":460,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"18"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740409800","event":{"tcp":{"tcp_flags":"18","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.18.18","src_port":8018,"netflow":{"pkts":19,"bytes":680,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969426,"dest_ip":"172.16.18.18","proto":"TCP","dest_port":461,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"19"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740409900","event":{"tcp":{"tcp_flags":"19","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.19.19","src_port":8019,"netflow":{"pkts":20,"bytes":690,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969427,"dest_ip":"172.16.19.19","proto":"TCP","dest_port":462,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"20"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740410000","event":{"tcp":{"tcp_flags":"20","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.20.20","src_port":8020,"netflow":{"pkts":21,"bytes":700,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969428,"dest_ip":"172.16.20.20","proto":"TCP","dest_port":463,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"21"}} +{"firewall_name":"fw-1","availability_zone":"us-east-1a","event_timestamp":"1740410100","event":{"tcp":{"tcp_flags":"21","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.21.21","src_port":8021,"netflow":{"pkts":22,"bytes":710,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969429,"dest_ip":"172.16.21.21","proto":"TCP","dest_port":464,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"22"}} +{"firewall_name":"fw-2","availability_zone":"us-east-1a","event_timestamp":"1740410200","event":{"tcp":{"tcp_flags":"22","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.22.22","src_port":8022,"netflow":{"pkts":23,"bytes":720,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969430,"dest_ip":"172.16.22.22","proto":"TCP","dest_port":465,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"23"}} +{"firewall_name":"fw-3","availability_zone":"us-east-1a","event_timestamp":"1740410300","event":{"tcp":{"tcp_flags":"23","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.23.23","src_port":8023,"netflow":{"pkts":24,"bytes":730,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969431,"dest_ip":"172.16.23.23","proto":"TCP","dest_port":466,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"24"}} +{"firewall_name":"fw-4","availability_zone":"us-east-1a","event_timestamp":"1740410400","event":{"tcp":{"tcp_flags":"24","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.24.24","src_port":8024,"netflow":{"pkts":25,"bytes":740,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969432,"dest_ip":"172.16.24.24","proto":"TCP","dest_port":467,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"25"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740410500","event":{"tcp":{"tcp_flags":"25","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.25.25","src_port":8025,"netflow":{"pkts":26,"bytes":750,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969433,"dest_ip":"172.16.25.25","proto":"TCP","dest_port":468,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"26"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740410600","event":{"tcp":{"tcp_flags":"26","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.26.26","src_port":8026,"netflow":{"pkts":27,"bytes":760,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969434,"dest_ip":"172.16.26.26","proto":"TCP","dest_port":469,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"27"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740410700","event":{"tcp":{"tcp_flags":"27","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.27.27","src_port":8027,"netflow":{"pkts":28,"bytes":770,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969435,"dest_ip":"172.16.27.27","proto":"TCP","dest_port":470,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"28"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740410800","event":{"tcp":{"tcp_flags":"28","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.28.28","src_port":8028,"netflow":{"pkts":29,"bytes":780,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969436,"dest_ip":"172.16.28.28","proto":"TCP","dest_port":471,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"29"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740410900","event":{"tcp":{"tcp_flags":"29","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.29.29","src_port":8029,"netflow":{"pkts":30,"bytes":790,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969437,"dest_ip":"172.16.29.29","proto":"TCP","dest_port":472,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"30"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740411000","event":{"tcp":{"tcp_flags":"30","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.30.30","src_port":8030,"netflow":{"pkts":31,"bytes":800,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969438,"dest_ip":"172.16.30.30","proto":"TCP","dest_port":473,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"31"}} +{"firewall_name":"fw-1","availability_zone":"us-east-1a","event_timestamp":"1740411100","event":{"tcp":{"tcp_flags":"31","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.31.31","src_port":8031,"netflow":{"pkts":32,"bytes":810,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969439,"dest_ip":"172.16.31.31","proto":"TCP","dest_port":474,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"32"}} +{"firewall_name":"fw-2","availability_zone":"us-east-1a","event_timestamp":"1740411200","event":{"tcp":{"tcp_flags":"0","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.32.32","src_port":8032,"netflow":{"pkts":33,"bytes":820,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969440,"dest_ip":"172.16.32.32","proto":"TCP","dest_port":475,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"33"}} +{"firewall_name":"fw-3","availability_zone":"us-east-1a","event_timestamp":"1740411300","event":{"tcp":{"tcp_flags":"1","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.33.33","src_port":8033,"netflow":{"pkts":34,"bytes":830,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969441,"dest_ip":"172.16.33.33","proto":"TCP","dest_port":476,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"34"}} +{"firewall_name":"fw-4","availability_zone":"us-east-1a","event_timestamp":"1740411400","event":{"tcp":{"tcp_flags":"2","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.34.34","src_port":8034,"netflow":{"pkts":35,"bytes":840,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969442,"dest_ip":"172.16.34.34","proto":"TCP","dest_port":477,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"35"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740411500","event":{"tcp":{"tcp_flags":"3","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.35.35","src_port":8035,"netflow":{"pkts":36,"bytes":850,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969443,"dest_ip":"172.16.35.35","proto":"TCP","dest_port":478,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"36"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740411600","event":{"tcp":{"tcp_flags":"4","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.36.36","src_port":8036,"netflow":{"pkts":37,"bytes":860,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969444,"dest_ip":"172.16.36.36","proto":"TCP","dest_port":479,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"37"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740411700","event":{"tcp":{"tcp_flags":"5","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.37.37","src_port":8037,"netflow":{"pkts":38,"bytes":870,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969445,"dest_ip":"172.16.37.37","proto":"TCP","dest_port":480,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"38"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740411800","event":{"tcp":{"tcp_flags":"6","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.38.38","src_port":8038,"netflow":{"pkts":39,"bytes":880,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969446,"dest_ip":"172.16.38.38","proto":"TCP","dest_port":481,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"39"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740411900","event":{"tcp":{"tcp_flags":"7","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.39.39","src_port":8039,"netflow":{"pkts":40,"bytes":890,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969447,"dest_ip":"172.16.39.39","proto":"TCP","dest_port":482,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"40"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740412000","event":{"tcp":{"tcp_flags":"8","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.40.40","src_port":8040,"netflow":{"pkts":41,"bytes":900,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969448,"dest_ip":"172.16.40.40","proto":"TCP","dest_port":483,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"41"}} +{"firewall_name":"fw-1","availability_zone":"us-east-1a","event_timestamp":"1740412100","event":{"tcp":{"tcp_flags":"9","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.41.41","src_port":8041,"netflow":{"pkts":42,"bytes":910,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969449,"dest_ip":"172.16.41.41","proto":"TCP","dest_port":484,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"42"}} +{"firewall_name":"fw-2","availability_zone":"us-east-1a","event_timestamp":"1740412200","event":{"tcp":{"tcp_flags":"10","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.42.42","src_port":8042,"netflow":{"pkts":43,"bytes":920,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969450,"dest_ip":"172.16.42.42","proto":"TCP","dest_port":485,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"43"}} +{"firewall_name":"fw-3","availability_zone":"us-east-1a","event_timestamp":"1740412300","event":{"tcp":{"tcp_flags":"11","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.43.43","src_port":8043,"netflow":{"pkts":44,"bytes":930,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969451,"dest_ip":"172.16.43.43","proto":"TCP","dest_port":486,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"44"}} +{"firewall_name":"fw-4","availability_zone":"us-east-1a","event_timestamp":"1740412400","event":{"tcp":{"tcp_flags":"12","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.44.44","src_port":8044,"netflow":{"pkts":45,"bytes":940,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969452,"dest_ip":"172.16.44.44","proto":"TCP","dest_port":487,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"45"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740412500","event":{"tcp":{"tcp_flags":"13","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.45.45","src_port":8045,"netflow":{"pkts":46,"bytes":950,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969453,"dest_ip":"172.16.45.45","proto":"TCP","dest_port":488,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"46"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740412600","event":{"tcp":{"tcp_flags":"14","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.46.46","src_port":8046,"netflow":{"pkts":47,"bytes":960,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969454,"dest_ip":"172.16.46.46","proto":"TCP","dest_port":489,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"47"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740412700","event":{"tcp":{"tcp_flags":"15","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.47.47","src_port":8047,"netflow":{"pkts":48,"bytes":970,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969455,"dest_ip":"172.16.47.47","proto":"TCP","dest_port":490,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"48"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740412800","event":{"tcp":{"tcp_flags":"16","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.48.48","src_port":8048,"netflow":{"pkts":49,"bytes":980,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969456,"dest_ip":"172.16.48.48","proto":"TCP","dest_port":491,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"49"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740412900","event":{"tcp":{"tcp_flags":"17","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.49.49","src_port":8049,"netflow":{"pkts":50,"bytes":990,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969457,"dest_ip":"172.16.49.49","proto":"TCP","dest_port":492,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"50"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740413000","event":{"tcp":{"tcp_flags":"18","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.50.50","src_port":8050,"netflow":{"pkts":1,"bytes":1000,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969458,"dest_ip":"172.16.50.50","proto":"TCP","dest_port":493,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"51"}} +{"firewall_name":"fw-1","availability_zone":"us-east-1a","event_timestamp":"1740413100","event":{"tcp":{"tcp_flags":"19","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.51.51","src_port":8051,"netflow":{"pkts":2,"bytes":1010,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969459,"dest_ip":"172.16.51.51","proto":"TCP","dest_port":494,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"52"}} +{"firewall_name":"fw-2","availability_zone":"us-east-1a","event_timestamp":"1740413200","event":{"tcp":{"tcp_flags":"20","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.52.52","src_port":8052,"netflow":{"pkts":3,"bytes":1020,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969460,"dest_ip":"172.16.52.52","proto":"TCP","dest_port":495,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"53"}} +{"firewall_name":"fw-3","availability_zone":"us-east-1a","event_timestamp":"1740413300","event":{"tcp":{"tcp_flags":"21","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.53.53","src_port":8053,"netflow":{"pkts":4,"bytes":1030,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969461,"dest_ip":"172.16.53.53","proto":"TCP","dest_port":496,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"54"}} +{"firewall_name":"fw-4","availability_zone":"us-east-1a","event_timestamp":"1740413400","event":{"tcp":{"tcp_flags":"22","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.54.54","src_port":8054,"netflow":{"pkts":5,"bytes":1040,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969462,"dest_ip":"172.16.54.54","proto":"TCP","dest_port":497,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"55"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740413500","event":{"tcp":{"tcp_flags":"23","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.55.55","src_port":8055,"netflow":{"pkts":6,"bytes":1050,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969463,"dest_ip":"172.16.55.55","proto":"TCP","dest_port":498,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"56"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740413600","event":{"tcp":{"tcp_flags":"24","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.56.56","src_port":8056,"netflow":{"pkts":7,"bytes":1060,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969464,"dest_ip":"172.16.56.56","proto":"TCP","dest_port":499,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"57"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740413700","event":{"tcp":{"tcp_flags":"25","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.57.57","src_port":8057,"netflow":{"pkts":8,"bytes":1070,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969465,"dest_ip":"172.16.57.57","proto":"TCP","dest_port":500,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"58"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740413800","event":{"tcp":{"tcp_flags":"26","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.58.58","src_port":8058,"netflow":{"pkts":9,"bytes":1080,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969466,"dest_ip":"172.16.58.58","proto":"TCP","dest_port":501,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"59"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740413900","event":{"tcp":{"tcp_flags":"27","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.59.59","src_port":8059,"netflow":{"pkts":10,"bytes":1090,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969467,"dest_ip":"172.16.59.59","proto":"TCP","dest_port":502,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"60"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740414000","event":{"tcp":{"tcp_flags":"28","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.60.60","src_port":8060,"netflow":{"pkts":11,"bytes":1100,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969468,"dest_ip":"172.16.60.60","proto":"TCP","dest_port":503,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"61"}} +{"firewall_name":"fw-1","availability_zone":"us-east-1a","event_timestamp":"1740414100","event":{"tcp":{"tcp_flags":"29","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.61.61","src_port":8061,"netflow":{"pkts":12,"bytes":1110,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969469,"dest_ip":"172.16.61.61","proto":"TCP","dest_port":504,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"62"}} +{"firewall_name":"fw-2","availability_zone":"us-east-1a","event_timestamp":"1740414200","event":{"tcp":{"tcp_flags":"30","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.62.62","src_port":8062,"netflow":{"pkts":13,"bytes":1120,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969470,"dest_ip":"172.16.62.62","proto":"TCP","dest_port":505,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"63"}} +{"firewall_name":"fw-3","availability_zone":"us-east-1a","event_timestamp":"1740414300","event":{"tcp":{"tcp_flags":"31","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.63.63","src_port":8063,"netflow":{"pkts":14,"bytes":1130,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969471,"dest_ip":"172.16.63.63","proto":"TCP","dest_port":506,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"64"}} +{"firewall_name":"fw-4","availability_zone":"us-east-1a","event_timestamp":"1740414400","event":{"tcp":{"tcp_flags":"0","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.64.64","src_port":8064,"netflow":{"pkts":15,"bytes":1140,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969472,"dest_ip":"172.16.64.64","proto":"TCP","dest_port":507,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"65"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740414500","event":{"tcp":{"tcp_flags":"1","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.65.65","src_port":8065,"netflow":{"pkts":16,"bytes":1150,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969473,"dest_ip":"172.16.65.65","proto":"TCP","dest_port":508,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"66"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740414600","event":{"tcp":{"tcp_flags":"2","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.66.66","src_port":8066,"netflow":{"pkts":17,"bytes":1160,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969474,"dest_ip":"172.16.66.66","proto":"TCP","dest_port":509,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"67"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740414700","event":{"tcp":{"tcp_flags":"3","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.67.67","src_port":8067,"netflow":{"pkts":18,"bytes":1170,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969475,"dest_ip":"172.16.67.67","proto":"TCP","dest_port":510,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"68"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740414800","event":{"tcp":{"tcp_flags":"4","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.68.68","src_port":8068,"netflow":{"pkts":19,"bytes":1180,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969476,"dest_ip":"172.16.68.68","proto":"TCP","dest_port":511,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"69"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740414900","event":{"tcp":{"tcp_flags":"5","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.69.69","src_port":8069,"netflow":{"pkts":20,"bytes":1190,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969477,"dest_ip":"172.16.69.69","proto":"TCP","dest_port":512,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"70"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740415000","event":{"tcp":{"tcp_flags":"6","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.70.70","src_port":8070,"netflow":{"pkts":21,"bytes":1200,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969478,"dest_ip":"172.16.70.70","proto":"TCP","dest_port":513,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"71"}} +{"firewall_name":"fw-1","availability_zone":"us-east-1a","event_timestamp":"1740415100","event":{"tcp":{"tcp_flags":"7","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.71.71","src_port":8071,"netflow":{"pkts":22,"bytes":1210,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969479,"dest_ip":"172.16.71.71","proto":"TCP","dest_port":514,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"72"}} +{"firewall_name":"fw-2","availability_zone":"us-east-1a","event_timestamp":"1740415200","event":{"tcp":{"tcp_flags":"8","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.72.72","src_port":8072,"netflow":{"pkts":23,"bytes":1220,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969480,"dest_ip":"172.16.72.72","proto":"TCP","dest_port":515,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"73"}} +{"firewall_name":"fw-3","availability_zone":"us-east-1a","event_timestamp":"1740415300","event":{"tcp":{"tcp_flags":"9","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.73.73","src_port":8073,"netflow":{"pkts":24,"bytes":1230,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969481,"dest_ip":"172.16.73.73","proto":"TCP","dest_port":516,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"74"}} +{"firewall_name":"fw-4","availability_zone":"us-east-1a","event_timestamp":"1740415400","event":{"tcp":{"tcp_flags":"10","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.74.74","src_port":8074,"netflow":{"pkts":25,"bytes":1240,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969482,"dest_ip":"172.16.74.74","proto":"TCP","dest_port":517,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"75"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740415500","event":{"tcp":{"tcp_flags":"11","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.75.75","src_port":8075,"netflow":{"pkts":26,"bytes":1250,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969483,"dest_ip":"172.16.75.75","proto":"TCP","dest_port":518,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"76"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740415600","event":{"tcp":{"tcp_flags":"12","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.76.76","src_port":8076,"netflow":{"pkts":27,"bytes":1260,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969484,"dest_ip":"172.16.76.76","proto":"TCP","dest_port":519,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"77"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740415700","event":{"tcp":{"tcp_flags":"13","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.77.77","src_port":8077,"netflow":{"pkts":28,"bytes":1270,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969485,"dest_ip":"172.16.77.77","proto":"TCP","dest_port":520,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"78"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740415800","event":{"tcp":{"tcp_flags":"14","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.78.78","src_port":8078,"netflow":{"pkts":29,"bytes":1280,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969486,"dest_ip":"172.16.78.78","proto":"TCP","dest_port":521,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"79"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740415900","event":{"tcp":{"tcp_flags":"15","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.79.79","src_port":8079,"netflow":{"pkts":30,"bytes":1290,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969487,"dest_ip":"172.16.79.79","proto":"TCP","dest_port":522,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"80"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740416000","event":{"tcp":{"tcp_flags":"16","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.80.80","src_port":8080,"netflow":{"pkts":31,"bytes":1300,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969488,"dest_ip":"172.16.80.80","proto":"TCP","dest_port":523,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"81"}} +{"firewall_name":"fw-1","availability_zone":"us-east-1a","event_timestamp":"1740416100","event":{"tcp":{"tcp_flags":"17","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.81.81","src_port":8081,"netflow":{"pkts":32,"bytes":1310,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969489,"dest_ip":"172.16.81.81","proto":"TCP","dest_port":524,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"82"}} +{"firewall_name":"fw-2","availability_zone":"us-east-1a","event_timestamp":"1740416200","event":{"tcp":{"tcp_flags":"18","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.82.82","src_port":8082,"netflow":{"pkts":33,"bytes":1320,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969490,"dest_ip":"172.16.82.82","proto":"TCP","dest_port":525,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"83"}} +{"firewall_name":"fw-3","availability_zone":"us-east-1a","event_timestamp":"1740416300","event":{"tcp":{"tcp_flags":"19","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.83.83","src_port":8083,"netflow":{"pkts":34,"bytes":1330,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969491,"dest_ip":"172.16.83.83","proto":"TCP","dest_port":526,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"84"}} +{"firewall_name":"fw-4","availability_zone":"us-east-1a","event_timestamp":"1740416400","event":{"tcp":{"tcp_flags":"20","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.84.84","src_port":8084,"netflow":{"pkts":35,"bytes":1340,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969492,"dest_ip":"172.16.84.84","proto":"TCP","dest_port":527,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"85"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740416500","event":{"tcp":{"tcp_flags":"21","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.85.85","src_port":8085,"netflow":{"pkts":36,"bytes":1350,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969493,"dest_ip":"172.16.85.85","proto":"TCP","dest_port":528,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"86"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740416600","event":{"tcp":{"tcp_flags":"22","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.86.86","src_port":8086,"netflow":{"pkts":37,"bytes":1360,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969494,"dest_ip":"172.16.86.86","proto":"TCP","dest_port":529,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"87"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740416700","event":{"tcp":{"tcp_flags":"23","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.87.87","src_port":8087,"netflow":{"pkts":38,"bytes":1370,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969495,"dest_ip":"172.16.87.87","proto":"TCP","dest_port":530,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"88"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740416800","event":{"tcp":{"tcp_flags":"24","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.88.88","src_port":8088,"netflow":{"pkts":39,"bytes":1380,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969496,"dest_ip":"172.16.88.88","proto":"TCP","dest_port":531,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"89"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740416900","event":{"tcp":{"tcp_flags":"25","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.89.89","src_port":8089,"netflow":{"pkts":40,"bytes":1390,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969497,"dest_ip":"172.16.89.89","proto":"TCP","dest_port":532,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"90"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740417000","event":{"tcp":{"tcp_flags":"26","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.90.90","src_port":8090,"netflow":{"pkts":41,"bytes":1400,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969498,"dest_ip":"172.16.90.90","proto":"TCP","dest_port":533,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"91"}} +{"firewall_name":"fw-1","availability_zone":"us-east-1a","event_timestamp":"1740417100","event":{"tcp":{"tcp_flags":"27","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.91.91","src_port":8091,"netflow":{"pkts":42,"bytes":1410,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969499,"dest_ip":"172.16.91.91","proto":"TCP","dest_port":534,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"92"}} +{"firewall_name":"fw-2","availability_zone":"us-east-1a","event_timestamp":"1740417200","event":{"tcp":{"tcp_flags":"28","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.92.92","src_port":8092,"netflow":{"pkts":43,"bytes":1420,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969500,"dest_ip":"172.16.92.92","proto":"TCP","dest_port":535,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"93"}} +{"firewall_name":"fw-3","availability_zone":"us-east-1a","event_timestamp":"1740417300","event":{"tcp":{"tcp_flags":"29","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.93.93","src_port":8093,"netflow":{"pkts":44,"bytes":1430,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969501,"dest_ip":"172.16.93.93","proto":"TCP","dest_port":536,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"94"}} +{"firewall_name":"fw-4","availability_zone":"us-east-1a","event_timestamp":"1740417400","event":{"tcp":{"tcp_flags":"30","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.94.94","src_port":8094,"netflow":{"pkts":45,"bytes":1440,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969502,"dest_ip":"172.16.94.94","proto":"TCP","dest_port":537,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"95"}} +{"firewall_name":"fw-5","availability_zone":"us-east-1a","event_timestamp":"1740417500","event":{"tcp":{"tcp_flags":"31","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.95.95","src_port":8095,"netflow":{"pkts":46,"bytes":1450,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969503,"dest_ip":"172.16.95.95","proto":"TCP","dest_port":538,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"96"}} +{"firewall_name":"fw-6","availability_zone":"us-east-1a","event_timestamp":"1740417600","event":{"tcp":{"tcp_flags":"0","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.96.96","src_port":8096,"netflow":{"pkts":47,"bytes":1460,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969504,"dest_ip":"172.16.96.96","proto":"TCP","dest_port":539,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"97"}} +{"firewall_name":"fw-7","availability_zone":"us-east-1a","event_timestamp":"1740417700","event":{"tcp":{"tcp_flags":"1","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.97.97","src_port":8097,"netflow":{"pkts":48,"bytes":1470,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969505,"dest_ip":"172.16.97.97","proto":"TCP","dest_port":540,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"98"}} +{"firewall_name":"fw-8","availability_zone":"us-east-1a","event_timestamp":"1740417800","event":{"tcp":{"tcp_flags":"2","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.98.98","src_port":8098,"netflow":{"pkts":49,"bytes":1480,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969506,"dest_ip":"172.16.98.98","proto":"TCP","dest_port":541,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"99"}} +{"firewall_name":"fw-9","availability_zone":"us-east-1a","event_timestamp":"1740417900","event":{"tcp":{"tcp_flags":"3","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.99.99","src_port":8099,"netflow":{"pkts":50,"bytes":1490,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969507,"dest_ip":"172.16.99.99","proto":"TCP","dest_port":542,"timestamp":"2025-02-24T14:02:00.000000+0000"}} +{"index":{"_id":"100"}} +{"firewall_name":"fw-0","availability_zone":"us-east-1a","event_timestamp":"1740418000","event":{"tcp":{"tcp_flags":"4","syn":true,"fin":false,"ack":true},"app_proto":"http","src_ip":"10.0.100.0","src_port":8100,"netflow":{"pkts":1,"bytes":1500,"start":"2025-02-24T14:00:00.000000+0000","end":"2025-02-24T14:01:00.000000+0000","age":60,"min_ttl":64,"max_ttl":64},"event_type":"netflow","flow_id":16402963969508,"dest_ip":"172.16.100.0","proto":"TCP","dest_port":543,"timestamp":"2025-02-24T14:02:00.000000+0000"}} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/vpc_logs.json b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/vpc_logs.json new file mode 100644 index 00000000000..15da557a0c1 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/vpc_logs.json @@ -0,0 +1,200 @@ +{"index": {"_id": "1"}} +{"start": "2025-09-21T06:53:40.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.27.251", "dstaddr": "10.0.239.177", "protocol": "6", "total_count": 9, "bytes": 37534, "packets": 49} +{"index": {"_id": "2"}} +{"start": "2025-05-24T21:57:29.858575+0000", "action": "ACCEPT", "srcaddr": "182.53.30.77", "dstaddr": "10.0.11.144", "protocol": "6", "total_count": 9, "bytes": 192355, "packets": 155} +{"index": {"_id": "3"}} +{"start": "2025-08-21T08:12:46.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.241.127", "dstaddr": "40.106.220.213", "protocol": "6", "total_count": 9, "bytes": 31605, "packets": 35} +{"index": {"_id": "4"}} +{"start": "2025-05-13T08:44:40.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.206.143", "dstaddr": "135.27.167.55", "protocol": "1", "total_count": 2, "bytes": 730, "packets": 10} +{"index": {"_id": "5"}} +{"start": "2025-10-27T16:25:42.858575+0000", "action": "ACCEPT", "srcaddr": "118.117.89.55", "dstaddr": "10.0.211.246", "protocol": "6", "total_count": 9, "bytes": 67196, "packets": 107} +{"index": {"_id": "6"}} +{"start": "2025-07-28T21:06:03.858575+0000", "action": "REJECT", "srcaddr": "10.0.56.120", "dstaddr": "10.0.156.146", "protocol": "6", "total_count": 1, "bytes": 52, "packets": 1} +{"index": {"_id": "7"}} +{"start": "2025-07-07T18:16:57.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.213.143", "dstaddr": "103.188.15.206", "protocol": "6", "total_count": 8, "bytes": 32968, "packets": 26} +{"index": {"_id": "8"}} +{"start": "2025-08-16T20:27:22.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.74.231", "dstaddr": "10.0.195.244", "protocol": "6", "total_count": 5, "bytes": 64885, "packets": 95} +{"index": {"_id": "9"}} +{"start": "2025-06-08T04:24:13.858575+0000", "action": "ACCEPT", "srcaddr": "39.40.182.87", "dstaddr": "10.0.3.220", "protocol": "6", "total_count": 6, "bytes": 176391, "packets": 141} +{"index": {"_id": "10"}} +{"start": "2025-09-18T18:09:58.858575+0000", "action": "ACCEPT", "srcaddr": "213.227.231.57", "dstaddr": "10.0.74.110", "protocol": "6", "total_count": 11, "bytes": 182055, "packets": 159} +{"index": {"_id": "11"}} +{"start": "2025-10-01T11:37:46.858575+0000", "action": "ACCEPT", "srcaddr": "120.70.95.174", "dstaddr": "10.0.92.100", "protocol": "17", "total_count": 14, "bytes": 25335, "packets": 45} +{"index": {"_id": "12"}} +{"start": "2025-07-07T00:38:16.858575+0000", "action": "REJECT", "srcaddr": "10.0.210.196", "dstaddr": "10.0.164.70", "protocol": "17", "total_count": 1, "bytes": 385, "packets": 5} +{"index": {"_id": "13"}} +{"start": "2025-05-31T15:08:25.858575+0000", "action": "REJECT", "srcaddr": "10.0.194.48", "dstaddr": "10.0.252.82", "protocol": "6", "total_count": 1, "bytes": 350, "packets": 5} +{"index": {"_id": "14"}} +{"start": "2025-07-03T03:27:57.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.67.31", "dstaddr": "10.0.215.177", "protocol": "6", "total_count": 2, "bytes": 64005, "packets": 85} +{"index": {"_id": "15"}} +{"start": "2025-08-11T09:50:23.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.230.218", "dstaddr": "10.0.166.73", "protocol": "6", "total_count": 14, "bytes": 58140, "packets": 76} +{"index": {"_id": "16"}} +{"start": "2025-06-13T06:31:10.858575+0000", "action": "ACCEPT", "srcaddr": "6.186.106.13", "dstaddr": "10.0.194.75", "protocol": "6", "total_count": 10, "bytes": 210396, "packets": 197} +{"index": {"_id": "17"}} +{"start": "2025-07-12T00:58:59.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.221.154", "dstaddr": "221.136.21.103", "protocol": "17", "total_count": 8, "bytes": 8386, "packets": 14} +{"index": {"_id": "18"}} +{"start": "2025-10-08T01:35:18.858575+0000", "action": "REJECT", "srcaddr": "10.0.126.80", "dstaddr": "199.233.17.192", "protocol": "6", "total_count": 1, "bytes": 128, "packets": 2} +{"index": {"_id": "19"}} +{"start": "2025-10-22T09:29:54.858575+0000", "action": "REJECT", "srcaddr": "10.0.96.32", "dstaddr": "218.167.81.66", "protocol": "6", "total_count": 1, "bytes": 220, "packets": 4} +{"index": {"_id": "20"}} +{"start": "2025-06-18T04:37:11.858575+0000", "action": "REJECT", "srcaddr": "10.0.168.154", "dstaddr": "147.103.27.169", "protocol": "6", "total_count": 1, "bytes": 420, "packets": 5} +{"index": {"_id": "21"}} +{"start": "2025-05-09T07:25:29.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.14.9", "dstaddr": "10.0.83.167", "protocol": "6", "total_count": 1, "bytes": 175820, "packets": 118} +{"index": {"_id": "22"}} +{"start": "2025-07-19T11:07:03.858575+0000", "action": "ACCEPT", "srcaddr": "45.210.22.48", "dstaddr": "10.0.161.180", "protocol": "6", "total_count": 12, "bytes": 70006, "packets": 58} +{"index": {"_id": "23"}} +{"start": "2025-06-21T11:42:48.858575+0000", "action": "ACCEPT", "srcaddr": "202.122.233.69", "dstaddr": "10.0.229.224", "protocol": "6", "total_count": 6, "bytes": 59852, "packets": 52} +{"index": {"_id": "24"}} +{"start": "2025-06-02T23:13:40.858575+0000", "action": "ACCEPT", "srcaddr": "1.24.59.183", "dstaddr": "10.0.60.144", "protocol": "6", "total_count": 5, "bytes": 48200, "packets": 50} +{"index": {"_id": "25"}} +{"start": "2025-06-02T05:07:52.858575+0000", "action": "ACCEPT", "srcaddr": "139.64.55.38", "dstaddr": "10.0.138.175", "protocol": "17", "total_count": 6, "bytes": 11856, "packets": 39} +{"index": {"_id": "26"}} +{"start": "2025-09-05T04:28:24.858575+0000", "action": "ACCEPT", "srcaddr": "214.53.147.28", "dstaddr": "10.0.149.151", "protocol": "17", "total_count": 11, "bytes": 5307, "packets": 29} +{"index": {"_id": "27"}} +{"start": "2025-09-19T03:49:51.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.231.176", "dstaddr": "34.55.235.91", "protocol": "6", "total_count": 6, "bytes": 149625, "packets": 171} +{"index": {"_id": "28"}} +{"start": "2025-07-16T08:06:05.858575+0000", "action": "ACCEPT", "srcaddr": "219.195.109.54", "dstaddr": "10.0.147.33", "protocol": "6", "total_count": 12, "bytes": 119064, "packets": 82} +{"index": {"_id": "29"}} +{"start": "2025-08-01T20:39:23.858575+0000", "action": "ACCEPT", "srcaddr": "204.252.1.34", "dstaddr": "10.0.2.231", "protocol": "6", "total_count": 6, "bytes": 44128, "packets": 56} +{"index": {"_id": "30"}} +{"start": "2025-10-12T18:22:52.858575+0000", "action": "ACCEPT", "srcaddr": "185.181.124.51", "dstaddr": "10.0.194.43", "protocol": "47", "total_count": 9, "bytes": 411, "packets": 3} +{"index": {"_id": "31"}} +{"start": "2025-10-29T03:40:55.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.165.194", "dstaddr": "223.252.77.226", "protocol": "6", "total_count": 11, "bytes": 214512, "packets": 164} +{"index": {"_id": "32"}} +{"start": "2025-05-22T05:06:51.858575+0000", "action": "ACCEPT", "srcaddr": "30.193.135.22", "dstaddr": "10.0.167.74", "protocol": "6", "total_count": 15, "bytes": 183353, "packets": 181} +{"index": {"_id": "33"}} +{"start": "2025-08-16T07:32:40.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.154.101", "dstaddr": "10.0.116.210", "protocol": "6", "total_count": 12, "bytes": 158235, "packets": 137} +{"index": {"_id": "34"}} +{"start": "2025-10-25T04:13:11.858575+0000", "action": "ACCEPT", "srcaddr": "180.211.253.62", "dstaddr": "10.0.148.76", "protocol": "17", "total_count": 8, "bytes": 17748, "packets": 34} +{"index": {"_id": "35"}} +{"start": "2025-10-06T14:32:09.858575+0000", "action": "ACCEPT", "srcaddr": "175.2.169.160", "dstaddr": "10.0.149.106", "protocol": "6", "total_count": 14, "bytes": 13806, "packets": 26} +{"index": {"_id": "36"}} +{"start": "2025-08-22T13:59:28.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.163.148", "dstaddr": "167.14.167.240", "protocol": "47", "total_count": 1, "bytes": 1755, "packets": 9} +{"index": {"_id": "37"}} +{"start": "2025-06-06T02:57:13.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.170.33", "dstaddr": "201.127.20.238", "protocol": "17", "total_count": 1, "bytes": 6129, "packets": 9} +{"index": {"_id": "38"}} +{"start": "2025-05-23T03:10:24.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.99.147", "dstaddr": "182.58.134.190", "protocol": "6", "total_count": 3, "bytes": 94656, "packets": 174} +{"index": {"_id": "39"}} +{"start": "2025-06-18T02:53:38.858575+0000", "action": "ACCEPT", "srcaddr": "202.158.178.15", "dstaddr": "10.0.25.187", "protocol": "6", "total_count": 3, "bytes": 113520, "packets": 129} +{"index": {"_id": "40"}} +{"start": "2025-08-01T01:27:16.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.8.9", "dstaddr": "10.0.160.34", "protocol": "17", "total_count": 14, "bytes": 17501, "packets": 37} +{"index": {"_id": "41"}} +{"start": "2025-08-26T17:30:21.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.249.79", "dstaddr": "10.0.163.35", "protocol": "6", "total_count": 2, "bytes": 118000, "packets": 80} +{"index": {"_id": "42"}} +{"start": "2025-05-16T04:06:52.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.177.109", "dstaddr": "10.0.56.203", "protocol": "6", "total_count": 10, "bytes": 34371, "packets": 67} +{"index": {"_id": "43"}} +{"start": "2025-06-30T11:03:56.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.230.242", "dstaddr": "10.0.163.246", "protocol": "6", "total_count": 2, "bytes": 116459, "packets": 127} +{"index": {"_id": "44"}} +{"start": "2025-10-15T12:04:50.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.247.123", "dstaddr": "10.0.248.107", "protocol": "17", "total_count": 13, "bytes": 1881, "packets": 11} +{"index": {"_id": "45"}} +{"start": "2025-10-13T16:09:54.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.140.174", "dstaddr": "150.160.32.84", "protocol": "6", "total_count": 10, "bytes": 30910, "packets": 22} +{"index": {"_id": "46"}} +{"start": "2025-10-19T21:25:27.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.163.249", "dstaddr": "120.67.35.74", "protocol": "6", "total_count": 5, "bytes": 187200, "packets": 200} +{"index": {"_id": "47"}} +{"start": "2025-05-21T11:13:18.858575+0000", "action": "ACCEPT", "srcaddr": "169.225.43.124", "dstaddr": "10.0.97.193", "protocol": "6", "total_count": 1, "bytes": 27650, "packets": 50} +{"index": {"_id": "48"}} +{"start": "2025-07-06T17:18:14.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.202.93", "dstaddr": "206.209.120.252", "protocol": "17", "total_count": 11, "bytes": 10656, "packets": 18} +{"index": {"_id": "49"}} +{"start": "2025-06-11T13:36:41.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.215.117", "dstaddr": "10.0.199.250", "protocol": "6", "total_count": 15, "bytes": 52542, "packets": 42} +{"index": {"_id": "50"}} +{"start": "2025-08-13T14:42:17.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.194.195", "dstaddr": "47.145.111.246", "protocol": "6", "total_count": 15, "bytes": 77113, "packets": 59} +{"index": {"_id": "51"}} +{"start": "2025-08-18T01:26:41.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.143.105", "dstaddr": "223.146.46.238", "protocol": "47", "total_count": 5, "bytes": 1764, "packets": 12} +{"index": {"_id": "52"}} +{"start": "2025-06-20T00:54:27.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.67.176", "dstaddr": "118.124.149.78", "protocol": "6", "total_count": 1, "bytes": 92168, "packets": 164} +{"index": {"_id": "53"}} +{"start": "2025-05-21T06:25:29.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.46.117", "dstaddr": "132.210.20.46", "protocol": "50", "total_count": 2, "bytes": 140, "packets": 2} +{"index": {"_id": "54"}} +{"start": "2025-05-18T15:18:33.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.161.136", "dstaddr": "218.219.227.80", "protocol": "6", "total_count": 3, "bytes": 50680, "packets": 40} +{"index": {"_id": "55"}} +{"start": "2025-05-09T10:50:30.858575+0000", "action": "ACCEPT", "srcaddr": "61.14.212.211", "dstaddr": "10.0.42.118", "protocol": "17", "total_count": 12, "bytes": 11076, "packets": 39} +{"index": {"_id": "56"}} +{"start": "2025-05-16T07:22:23.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.132.168", "dstaddr": "10.0.107.6", "protocol": "6", "total_count": 3, "bytes": 10878, "packets": 21} +{"index": {"_id": "57"}} +{"start": "2025-06-16T09:32:18.858575+0000", "action": "ACCEPT", "srcaddr": "174.146.45.248", "dstaddr": "10.0.16.145", "protocol": "6", "total_count": 2, "bytes": 163989, "packets": 137} +{"index": {"_id": "58"}} +{"start": "2025-05-30T01:59:37.858575+0000", "action": "ACCEPT", "srcaddr": "192.228.108.151", "dstaddr": "10.0.127.142", "protocol": "17", "total_count": 5, "bytes": 114, "packets": 1} +{"index": {"_id": "59"}} +{"start": "2025-09-19T15:41:12.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.77.5", "dstaddr": "213.39.134.186", "protocol": "17", "total_count": 12, "bytes": 9432, "packets": 24} +{"index": {"_id": "60"}} +{"start": "2025-07-12T15:05:15.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.91.27", "dstaddr": "11.111.108.48", "protocol": "6", "total_count": 1, "bytes": 259776, "packets": 198} +{"index": {"_id": "61"}} +{"start": "2025-09-30T19:06:29.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.101.123", "dstaddr": "10.0.231.202", "protocol": "6", "total_count": 2, "bytes": 75552, "packets": 96} +{"index": {"_id": "62"}} +{"start": "2025-05-09T01:50:41.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.3.100", "dstaddr": "10.0.182.11", "protocol": "6", "total_count": 5, "bytes": 68768, "packets": 56} +{"index": {"_id": "63"}} +{"start": "2025-07-28T21:56:37.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.55.35", "dstaddr": "207.1.132.190", "protocol": "6", "total_count": 3, "bytes": 70146, "packets": 54} +{"index": {"_id": "64"}} +{"start": "2025-05-25T23:52:18.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.108.29", "dstaddr": "10.0.173.102", "protocol": "6", "total_count": 7, "bytes": 17244, "packets": 18} +{"index": {"_id": "65"}} +{"start": "2025-08-27T01:53:23.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.211.40", "dstaddr": "115.77.29.14", "protocol": "1", "total_count": 15, "bytes": 2464, "packets": 14} +{"index": {"_id": "66"}} +{"start": "2025-05-30T07:32:16.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.150.131", "dstaddr": "25.206.144.45", "protocol": "6", "total_count": 4, "bytes": 163082, "packets": 146} +{"index": {"_id": "67"}} +{"start": "2025-07-10T19:06:03.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.13.162", "dstaddr": "223.41.232.96", "protocol": "6", "total_count": 11, "bytes": 135296, "packets": 112} +{"index": {"_id": "68"}} +{"start": "2025-07-19T22:23:50.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.138.68", "dstaddr": "10.0.9.3", "protocol": "17", "total_count": 11, "bytes": 11408, "packets": 31} +{"index": {"_id": "69"}} +{"start": "2025-06-04T00:12:04.858575+0000", "action": "ACCEPT", "srcaddr": "186.141.33.213", "dstaddr": "10.0.73.101", "protocol": "6", "total_count": 10, "bytes": 62988, "packets": 116} +{"index": {"_id": "70"}} +{"start": "2025-06-20T03:43:36.858575+0000", "action": "ACCEPT", "srcaddr": "192.252.93.160", "dstaddr": "10.0.94.154", "protocol": "6", "total_count": 12, "bytes": 111752, "packets": 122} +{"index": {"_id": "71"}} +{"start": "2025-08-20T05:09:17.858575+0000", "action": "ACCEPT", "srcaddr": "195.116.62.18", "dstaddr": "10.0.51.239", "protocol": "17", "total_count": 11, "bytes": 6808, "packets": 46} +{"index": {"_id": "72"}} +{"start": "2025-05-17T21:32:06.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.64.92", "dstaddr": "172.224.215.114", "protocol": "17", "total_count": 10, "bytes": 18496, "packets": 34} +{"index": {"_id": "73"}} +{"start": "2025-08-27T18:37:55.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.146.60", "dstaddr": "10.0.195.114", "protocol": "6", "total_count": 14, "bytes": 120127, "packets": 131} +{"index": {"_id": "74"}} +{"start": "2025-07-22T08:49:53.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.159.35", "dstaddr": "192.198.160.127", "protocol": "47", "total_count": 9, "bytes": 1665, "packets": 9} +{"index": {"_id": "75"}} +{"start": "2025-09-27T17:01:37.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.107.130", "dstaddr": "211.213.211.106", "protocol": "50", "total_count": 13, "bytes": 2774, "packets": 19} +{"index": {"_id": "76"}} +{"start": "2025-05-17T08:23:05.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.166.194", "dstaddr": "10.0.181.254", "protocol": "6", "total_count": 12, "bytes": 83249, "packets": 83} +{"index": {"_id": "77"}} +{"start": "2025-10-14T13:23:11.858575+0000", "action": "REJECT", "srcaddr": "10.0.7.206", "dstaddr": "207.190.86.69", "protocol": "6", "total_count": 1, "bytes": 142, "packets": 2} +{"index": {"_id": "78"}} +{"start": "2025-06-23T05:30:35.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.250.235", "dstaddr": "10.0.100.62", "protocol": "1", "total_count": 11, "bytes": 680, "packets": 4} +{"index": {"_id": "79"}} +{"start": "2025-08-17T21:28:18.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.43.29", "dstaddr": "10.0.46.89", "protocol": "6", "total_count": 2, "bytes": 21030, "packets": 30} +{"index": {"_id": "80"}} +{"start": "2025-08-01T06:29:53.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.78.132", "dstaddr": "10.0.207.33", "protocol": "6", "total_count": 11, "bytes": 76635, "packets": 65} +{"index": {"_id": "81"}} +{"start": "2025-10-13T07:41:19.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.77.97", "dstaddr": "72.196.249.251", "protocol": "6", "total_count": 6, "bytes": 120992, "packets": 152} +{"index": {"_id": "82"}} +{"start": "2025-05-03T16:05:16.858575+0000", "action": "ACCEPT", "srcaddr": "60.248.225.125", "dstaddr": "10.0.91.194", "protocol": "17", "total_count": 4, "bytes": 6480, "packets": 36} +{"index": {"_id": "83"}} +{"start": "2025-06-09T22:21:56.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.107.121", "dstaddr": "222.244.32.139", "protocol": "6", "total_count": 15, "bytes": 51828, "packets": 84} +{"index": {"_id": "84"}} +{"start": "2025-06-25T19:48:03.858575+0000", "action": "ACCEPT", "srcaddr": "201.141.77.3", "dstaddr": "10.0.118.54", "protocol": "6", "total_count": 13, "bytes": 103024, "packets": 94} +{"index": {"_id": "85"}} +{"start": "2025-10-07T03:38:12.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.227.35", "dstaddr": "10.0.62.137", "protocol": "6", "total_count": 5, "bytes": 127776, "packets": 176} +{"index": {"_id": "86"}} +{"start": "2025-10-14T11:20:39.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.25.231", "dstaddr": "63.198.81.91", "protocol": "47", "total_count": 2, "bytes": 1830, "packets": 10} +{"index": {"_id": "87"}} +{"start": "2025-08-27T04:01:58.858575+0000", "action": "ACCEPT", "srcaddr": "180.230.60.147", "dstaddr": "10.0.83.183", "protocol": "17", "total_count": 5, "bytes": 2568, "packets": 8} +{"index": {"_id": "88"}} +{"start": "2025-06-05T12:56:41.858575+0000", "action": "ACCEPT", "srcaddr": "210.231.198.95", "dstaddr": "10.0.170.20", "protocol": "6", "total_count": 9, "bytes": 17712, "packets": 12} +{"index": {"_id": "89"}} +{"start": "2025-08-17T11:35:53.858575+0000", "action": "ACCEPT", "srcaddr": "59.123.7.27", "dstaddr": "10.0.75.184", "protocol": "6", "total_count": 2, "bytes": 24534, "packets": 47} +{"index": {"_id": "90"}} +{"start": "2025-05-10T20:51:45.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.171.150", "dstaddr": "10.0.53.75", "protocol": "6", "total_count": 12, "bytes": 33516, "packets": 49} +{"index": {"_id": "91"}} +{"start": "2025-09-23T08:53:16.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.117.121", "dstaddr": "139.201.180.4", "protocol": "6", "total_count": 2, "bytes": 35035, "packets": 55} +{"index": {"_id": "92"}} +{"start": "2025-08-18T22:44:40.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.170.252", "dstaddr": "10.0.224.119", "protocol": "17", "total_count": 10, "bytes": 17182, "packets": 22} +{"index": {"_id": "93"}} +{"start": "2025-08-11T01:38:54.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.186.67", "dstaddr": "21.143.92.15", "protocol": "6", "total_count": 13, "bytes": 115440, "packets": 111} +{"index": {"_id": "94"}} +{"start": "2025-07-18T08:51:24.858575+0000", "action": "ACCEPT", "srcaddr": "121.65.198.154", "dstaddr": "10.0.113.54", "protocol": "6", "total_count": 1, "bytes": 267655, "packets": 199} +{"index": {"_id": "95"}} +{"start": "2025-05-12T14:02:16.858575+0000", "action": "ACCEPT", "srcaddr": "115.27.64.3", "dstaddr": "10.0.159.18", "protocol": "6", "total_count": 11, "bytes": 164891, "packets": 181} +{"index": {"_id": "96"}} +{"start": "2025-10-08T06:03:32.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.115.237", "dstaddr": "161.255.15.161", "protocol": "6", "total_count": 12, "bytes": 60588, "packets": 66} +{"index": {"_id": "97"}} +{"start": "2025-05-08T00:49:40.858575+0000", "action": "ACCEPT", "srcaddr": "10.0.229.44", "dstaddr": "194.69.206.150", "protocol": "6", "total_count": 6, "bytes": 89900, "packets": 62} +{"index": {"_id": "98"}} +{"start": "2025-10-04T08:31:06.858575+0000", "action": "REJECT", "srcaddr": "10.0.151.99", "dstaddr": "10.0.47.132", "protocol": "6", "total_count": 1, "bytes": 118, "packets": 2} +{"index": {"_id": "99"}} +{"start": "2025-06-25T12:26:40.858575+0000", "action": "ACCEPT", "srcaddr": "214.242.197.139", "dstaddr": "10.0.109.2", "protocol": "6", "total_count": 9, "bytes": 58058, "packets": 77} +{"index": {"_id": "100"}} +{"start": "2025-05-29T07:26:27.858575+0000", "action": "ACCEPT", "srcaddr": "135.29.206.112", "dstaddr": "10.0.192.106", "protocol": "6", "total_count": 6, "bytes": 53280, "packets": 96} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/waf_logs.json b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/waf_logs.json new file mode 100644 index 00000000000..d24127ae198 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/dashboard/testdata/waf_logs.json @@ -0,0 +1,200 @@ +{"index": {"_id": "1"}} +{"start_time": "2025-05-15T20:43:25.162224+0000", "webaclId": "arn:aws:wafv2:us-east-1:784781757088:regional/webacl/APIWAF-lkl/01b29038-23ae-14c5-23ac-007b1120fb93", "action": "ALLOW", "httpRequest.clientIp": "185.114.91.138", "httpRequest.country": "US", "httpRequest.uri": "/download", "httpRequest.httpMethod": "POST", "httpSourceId": "784781757088:zn99vte24b:staging", "terminatingRuleId": "CustomRateLimitRule", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "AWSManagedRulesAmazonIpReputationList", "event_count": 10} +{"index": {"_id": "2"}} +{"start_time": "2025-09-20T13:38:41.162224+0000", "webaclId": "arn:aws:wafv2:eu-central-1:250922725343:regional/webacl/SecurityWAF-ngh/018f30a7-06aa-2324-0f86-006df4b0eddd", "action": "BLOCK", "httpRequest.clientIp": "155.12.221.78", "httpRequest.country": "JP", "httpRequest.uri": "/terms", "httpRequest.httpMethod": "POST", "httpSourceId": "250922725343:06udlnzsuc:v2", "terminatingRuleId": "CustomIPWhitelistRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 2} +{"index": {"_id": "3"}} +{"start_time": "2025-07-09T18:22:57.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:712448542372:regional/webacl/DevWAF-nni/04e6fdf3-14a2-1071-1eac-004f8aa5f94a", "action": "BLOCK", "httpRequest.clientIp": "121.173.165.128", "httpRequest.country": "CA", "httpRequest.uri": "/metrics", "httpRequest.httpMethod": "GET", "httpSourceId": "712448542372:h11d127c1c:prod", "terminatingRuleId": "CustomIPWhitelistRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 5} +{"index": {"_id": "4"}} +{"start_time": "2025-08-06T16:02:55.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:915064614783:regional/webacl/DevWAF-kyf/01107a94-05b9-145d-1ccb-004af1dcf676", "action": "ALLOW", "httpRequest.clientIp": "13.234.156.211", "httpRequest.country": "BR", "httpRequest.uri": "/api/v1/orders", "httpRequest.httpMethod": "GET", "httpSourceId": "915064614783:oudun2xjou:v1", "terminatingRuleId": "XSSProtectionRule", "RuleType": "GROUP", "ruleGroupList.ruleId": "", "event_count": 1} +{"index": {"_id": "5"}} +{"start_time": "2025-08-10T11:54:24.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:782258924067:regional/webacl/TestWAF-jqx/0437727c-103a-0f27-0464-002168ce3f86", "action": "ALLOW", "httpRequest.clientIp": "142.126.11.6", "httpRequest.country": "US", "httpRequest.uri": "/checkout", "httpRequest.httpMethod": "GET", "httpSourceId": "782258924067:8xbvht9icb:dev", "terminatingRuleId": "AWS-AWSManagedRulesLinuxRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 1} +{"index": {"_id": "6"}} +{"start_time": "2025-10-17T02:55:37.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-1:761247370460:regional/webacl/SecurityWAF-otj/04f5fe7f-159a-19d0-1874-00a85e96bafc", "action": "ALLOW", "httpRequest.clientIp": "164.20.183.161", "httpRequest.country": "CA", "httpRequest.uri": "/api/v1/products", "httpRequest.httpMethod": "GET", "httpSourceId": "761247370460:lahqyfa58j:test", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "GROUP", "ruleGroupList.ruleId": "", "event_count": 8} +{"index": {"_id": "7"}} +{"start_time": "2025-08-31T21:05:36.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:629518466608:regional/webacl/ProductionWAF-jwe/02fcd137-1320-0d11-20a5-00224ff5be96", "action": "ALLOW", "httpRequest.clientIp": "215.157.246.19", "httpRequest.country": "GB", "httpRequest.uri": "/search", "httpRequest.httpMethod": "GET", "httpSourceId": "629518466608:t6kcojiss1:staging", "terminatingRuleId": "XSSProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesAmazonIpReputationList", "event_count": 3} +{"index": {"_id": "8"}} +{"start_time": "2025-05-26T06:38:11.162224+0000", "webaclId": "arn:aws:wafv2:eu-central-1:140517970760:regional/webacl/SecurityWAF-uho/01418728-1b9c-0c39-2625-00ceed126b78", "action": "ALLOW", "httpRequest.clientIp": "133.126.58.68", "httpRequest.country": "US", "httpRequest.uri": "/api/v2/payments", "httpRequest.httpMethod": "GET", "httpSourceId": "140517970760:2bekfk6qaw:v1", "terminatingRuleId": "AWS-AWSManagedRulesCommonRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 5} +{"index": {"_id": "9"}} +{"start_time": "2025-06-23T20:19:40.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:954230299361:regional/webacl/SecurityWAF-hpc/04f97731-148d-0d0d-07ce-00abd94e72e7", "action": "ALLOW", "httpRequest.clientIp": "182.128.253.61", "httpRequest.country": "IE", "httpRequest.uri": "/main", "httpRequest.httpMethod": "GET", "httpSourceId": "954230299361:ke46v9a5l1:staging", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesAmazonIpReputationList", "event_count": 1} +{"index": {"_id": "10"}} +{"start_time": "2025-06-08T01:39:30.162224+0000", "webaclId": "arn:aws:wafv2:ap-northeast-1:923302741632:regional/webacl/StagingWAF-mse/056c0342-1fa5-12fc-152b-00233e3dec77", "action": "BLOCK", "httpRequest.clientIp": "139.101.131.5", "httpRequest.country": "US", "httpRequest.uri": "/product/123", "httpRequest.httpMethod": "HEAD", "httpSourceId": "923302741632:9g5llo4j18:test", "terminatingRuleId": "AWS-AWSManagedRulesLinuxRuleSet", "RuleType": "MANAGED", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 1} +{"index": {"_id": "11"}} +{"start_time": "2025-08-29T16:55:09.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:267008286584:regional/webacl/DevWAF-lsg/03874ec4-134a-1bb6-1470-00563cba5c63", "action": "ALLOW", "httpRequest.clientIp": "183.136.182.221", "httpRequest.country": "US", "httpRequest.uri": "/api/v2/search", "httpRequest.httpMethod": "GET", "httpSourceId": "267008286584:xdw6lclhz6:dev", "terminatingRuleId": "SQLInjectionRule", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 3} +{"index": {"_id": "12"}} +{"start_time": "2025-05-17T14:45:33.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-1:568194838510:regional/webacl/SecurityWAF-qgh/00f92e4a-0a94-189a-1088-0065d4de2852", "action": "BLOCK", "httpRequest.clientIp": "37.23.255.49", "httpRequest.country": "DE", "httpRequest.uri": "/products", "httpRequest.httpMethod": "POST", "httpSourceId": "568194838510:n4zxuizmr1:prod", "terminatingRuleId": "CSRFProtectionRule", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 2} +{"index": {"_id": "13"}} +{"start_time": "2025-09-14T02:09:07.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-1:141005917363:regional/webacl/SecurityWAF-wul/0117cd7e-1791-1262-1b53-00ac2fde4651", "action": "ALLOW", "httpRequest.clientIp": "199.131.141.151", "httpRequest.country": "US", "httpRequest.uri": "/account", "httpRequest.httpMethod": "GET", "httpSourceId": "141005917363:hddvm2fc40:v1", "terminatingRuleId": "CustomRateLimitRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 5} +{"index": {"_id": "14"}} +{"start_time": "2025-05-12T15:23:00.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:681312555131:regional/webacl/ProductionWAF-wxe/02690b30-24b2-1bc2-1350-00748131824f", "action": "ALLOW", "httpRequest.clientIp": "208.132.79.232", "httpRequest.country": "IT", "httpRequest.uri": "/test", "httpRequest.httpMethod": "GET", "httpSourceId": "681312555131:lfnhxbkq4x:test", "terminatingRuleId": "CustomIPWhitelistRule", "RuleType": "GROUP", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 2} +{"index": {"_id": "15"}} +{"start_time": "2025-07-24T06:15:42.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-1:230137573327:regional/webacl/SecurityWAF-gic/03764ea0-10d6-07bc-0ff2-00a8c4d77060", "action": "ALLOW", "httpRequest.clientIp": "213.23.184.174", "httpRequest.country": "IN", "httpRequest.uri": "/search", "httpRequest.httpMethod": "GET", "httpSourceId": "230137573327:tcb1lk21lu:v2", "terminatingRuleId": "AWS-AWSManagedRulesUnixRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesAmazonIpReputationList", "event_count": 5} +{"index": {"_id": "16"}} +{"start_time": "2025-05-08T09:23:26.162224+0000", "webaclId": "arn:aws:wafv2:us-east-1:228190768145:regional/webacl/TestWAF-hfw/013e79f2-143e-1ac4-0fb1-0054c4332f8f", "action": "BLOCK", "httpRequest.clientIp": "209.80.140.157", "httpRequest.country": "GB", "httpRequest.uri": "/api/v2/analytics", "httpRequest.httpMethod": "POST", "httpSourceId": "228190768145:vfev5gclho:prod", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "GROUP", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 2} +{"index": {"_id": "17"}} +{"start_time": "2025-06-20T20:37:28.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:485407990968:regional/webacl/TestWAF-gge/0574b5f1-0b2e-2654-0ad9-008dfe9bfd14", "action": "BLOCK", "httpRequest.clientIp": "113.43.5.172", "httpRequest.country": "US", "httpRequest.uri": "/css/style.css", "httpRequest.httpMethod": "DELETE", "httpSourceId": "485407990968:n85036lish:v2", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 2} +{"index": {"_id": "18"}} +{"start_time": "2025-10-29T05:12:46.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:305293746139:regional/webacl/ProductionWAF-xsh/0576994d-09c4-0d5e-1e5b-003746b0920f", "action": "ALLOW", "httpRequest.clientIp": "212.180.56.15", "httpRequest.country": "RU", "httpRequest.uri": "/admin/settings", "httpRequest.httpMethod": "PATCH", "httpSourceId": "305293746139:5wmnd2hfx3:v1", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 3} +{"index": {"_id": "19"}} +{"start_time": "2025-06-14T07:17:28.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:519385246973:regional/webacl/DevWAF-ziy/01658338-2318-20fb-18a7-002ae59b594d", "action": "ALLOW", "httpRequest.clientIp": "111.91.245.99", "httpRequest.country": "AU", "httpRequest.uri": "/products", "httpRequest.httpMethod": "GET", "httpSourceId": "519385246973:whluxz0ukz:test", "terminatingRuleId": "CSRFProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 2} +{"index": {"_id": "20"}} +{"start_time": "2025-05-30T17:59:58.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:397861900291:regional/webacl/TestWAF-tff/01638e4a-04a8-1301-18be-00540a2219f9", "action": "ALLOW", "httpRequest.clientIp": "84.119.250.155", "httpRequest.country": "AU", "httpRequest.uri": "/staging", "httpRequest.httpMethod": "GET", "httpSourceId": "397861900291:nj5wg5zxia:v1", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 1} +{"index": {"_id": "21"}} +{"start_time": "2025-10-18T20:11:25.162224+0000", "webaclId": "arn:aws:wafv2:ap-northeast-1:447525657639:regional/webacl/APIWAF-cij/0556f178-1503-1896-1488-0046f7286a29", "action": "ALLOW", "httpRequest.clientIp": "223.49.113.37", "httpRequest.country": "BR", "httpRequest.uri": "/orders", "httpRequest.httpMethod": "GET", "httpSourceId": "447525657639:cm375qom0j:staging", "terminatingRuleId": "AWS-AWSManagedRulesSQLiRuleSet", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 2} +{"index": {"_id": "22"}} +{"start_time": "2025-06-15T14:53:12.162224+0000", "webaclId": "arn:aws:wafv2:us-east-1:242552932461:regional/webacl/TestWAF-egj/0376f324-163e-0d53-20a1-00179ffc5f35", "action": "ALLOW", "httpRequest.clientIp": "192.230.249.48", "httpRequest.country": "US", "httpRequest.uri": "/main", "httpRequest.httpMethod": "GET", "httpSourceId": "242552932461:p3p2qdrsar:test", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 2} +{"index": {"_id": "23"}} +{"start_time": "2025-08-01T03:33:01.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:833792890014:regional/webacl/APIWAF-tfc/0490d2a0-24a5-0c83-204b-008d070c48c2", "action": "BLOCK", "httpRequest.clientIp": "123.122.83.248", "httpRequest.country": "US", "httpRequest.uri": "/admin/settings", "httpRequest.httpMethod": "GET", "httpSourceId": "833792890014:0pvd7pw2sr:staging", "terminatingRuleId": "Default_Action", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 1} +{"index": {"_id": "24"}} +{"start_time": "2025-07-26T01:54:39.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-1:319937443488:regional/webacl/SecurityWAF-vdh/04c39b7f-1d15-06d0-04f8-0069a86df44c", "action": "ALLOW", "httpRequest.clientIp": "124.202.120.170", "httpRequest.country": "US", "httpRequest.uri": "/orders", "httpRequest.httpMethod": "GET", "httpSourceId": "319937443488:bt7bcw1tee:test", "terminatingRuleId": "CustomRateLimitRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 5} +{"index": {"_id": "25"}} +{"start_time": "2025-05-29T22:12:18.162224+0000", "webaclId": "arn:aws:wafv2:eu-north-1:311175445231:regional/webacl/SecurityWAF-heq/03ebcc61-2228-1b71-133d-00a6d8db69ea", "action": "ALLOW", "httpRequest.clientIp": "222.217.238.161", "httpRequest.country": "CN", "httpRequest.uri": "/ping", "httpRequest.httpMethod": "GET", "httpSourceId": "311175445231:vgu67fdisv:staging", "terminatingRuleId": "AWS-AWSManagedRulesLinuxRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 2} +{"index": {"_id": "26"}} +{"start_time": "2025-09-10T22:52:28.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:843221060015:regional/webacl/DevWAF-lwn/03ef20a7-15e3-1480-0aab-00de097002cf", "action": "BLOCK", "httpRequest.clientIp": "175.155.32.192", "httpRequest.country": "BE", "httpRequest.uri": "/v1", "httpRequest.httpMethod": "GET", "httpSourceId": "843221060015:s6wr62so7d:v1", "terminatingRuleId": "CustomRateLimitRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 10} +{"index": {"_id": "27"}} +{"start_time": "2025-06-17T00:09:25.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:348882978679:regional/webacl/ProductionWAF-rrs/0230f05b-1bd5-219e-13bf-0029f002522c", "action": "ALLOW", "httpRequest.clientIp": "214.194.86.85", "httpRequest.country": "US", "httpRequest.uri": "/dashboard", "httpRequest.httpMethod": "GET", "httpSourceId": "348882978679:v88ef1kta9:v1", "terminatingRuleId": "CustomRateLimitRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 8} +{"index": {"_id": "28"}} +{"start_time": "2025-07-20T16:03:13.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:286634171939:regional/webacl/ProductionWAF-htk/00ed22a7-0d93-25bc-0eb5-009370b5fe7b", "action": "ALLOW", "httpRequest.clientIp": "132.42.145.115", "httpRequest.country": "CH", "httpRequest.uri": "/status", "httpRequest.httpMethod": "GET", "httpSourceId": "286634171939:4psayh65ax:staging", "terminatingRuleId": "SQLInjectionRule", "RuleType": "MANAGED", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 3} +{"index": {"_id": "29"}} +{"start_time": "2025-07-10T01:01:00.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:405828461106:regional/webacl/DevWAF-cub/017adad3-23b2-1f36-1ba8-002c2d0e3ecb", "action": "ALLOW", "httpRequest.clientIp": "218.146.73.124", "httpRequest.country": "US", "httpRequest.uri": "/settings", "httpRequest.httpMethod": "GET", "httpSourceId": "405828461106:n1rtnl9kb7:v2", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 1} +{"index": {"_id": "30"}} +{"start_time": "2025-07-20T21:50:06.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:820771920287:regional/webacl/SecurityWAF-ngq/035af8f7-21e7-1948-0d8f-005259285db5", "action": "ALLOW", "httpRequest.clientIp": "75.193.82.89", "httpRequest.country": "CN", "httpRequest.uri": "/index", "httpRequest.httpMethod": "GET", "httpSourceId": "820771920287:rwpybmyan0:prod", "terminatingRuleId": "CSRFProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 2} +{"index": {"_id": "31"}} +{"start_time": "2025-10-04T20:14:01.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:313863925772:regional/webacl/APIWAF-tzz/012b0b07-120d-1105-1311-001de2dc3f62", "action": "ALLOW", "httpRequest.clientIp": "204.136.145.164", "httpRequest.country": "SE", "httpRequest.uri": "/products", "httpRequest.httpMethod": "HEAD", "httpSourceId": "313863925772:8wt8i96aj2:dev", "terminatingRuleId": "AWS-AWSManagedRulesCommonRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 1} +{"index": {"_id": "32"}} +{"start_time": "2025-05-31T16:31:47.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:323740657623:regional/webacl/StagingWAF-dyd/01087c2a-1625-135b-251a-003d76c9ddfb", "action": "ALLOW", "httpRequest.clientIp": "5.21.92.193", "httpRequest.country": "BR", "httpRequest.uri": "/category/electronics", "httpRequest.httpMethod": "POST", "httpSourceId": "323740657623:fnb68uxox5:staging", "terminatingRuleId": "AWS-AWSManagedRulesCommonRuleSet", "RuleType": "MANAGED", "ruleGroupList.ruleId": "", "event_count": 1} +{"index": {"_id": "33"}} +{"start_time": "2025-07-23T14:51:51.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-1:287127415725:regional/webacl/APIWAF-sky/02431439-2040-2138-2336-00956ac1538d", "action": "BLOCK", "httpRequest.clientIp": "25.142.235.245", "httpRequest.country": "DE", "httpRequest.uri": "/staging", "httpRequest.httpMethod": "PATCH", "httpSourceId": "287127415725:8sn1ctc7q8:test", "terminatingRuleId": "CSRFProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 15} +{"index": {"_id": "34"}} +{"start_time": "2025-05-20T20:10:47.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-1:975877598215:regional/webacl/StagingWAF-zjg/04eb590d-20d1-0506-1509-002aeb1b63ac", "action": "ALLOW", "httpRequest.clientIp": "156.166.93.83", "httpRequest.country": "ES", "httpRequest.uri": "/docs", "httpRequest.httpMethod": "GET", "httpSourceId": "975877598215:w97v1gsnhw:staging", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 1} +{"index": {"_id": "35"}} +{"start_time": "2025-07-11T22:31:36.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:876850421348:regional/webacl/ProductionWAF-imd/0098f7ac-0e2e-10d7-18a8-00d1d4c5ae83", "action": "ALLOW", "httpRequest.clientIp": "129.232.230.117", "httpRequest.country": "CA", "httpRequest.uri": "/reset-password", "httpRequest.httpMethod": "DELETE", "httpSourceId": "876850421348:5jm6h4phr9:test", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 1} +{"index": {"_id": "36"}} +{"start_time": "2025-08-20T09:09:39.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:830708589901:regional/webacl/ProductionWAF-qxh/01c0f76b-199e-1d9f-24c3-0056432e9bcd", "action": "COUNT", "httpRequest.clientIp": "222.192.232.234", "httpRequest.country": "AT", "httpRequest.uri": "/account", "httpRequest.httpMethod": "GET", "httpSourceId": "830708589901:5fa3q5q0un:prod", "terminatingRuleId": "AWS-AWSManagedRulesSQLiRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 1} +{"index": {"_id": "37"}} +{"start_time": "2025-06-12T11:13:23.162224+0000", "webaclId": "arn:aws:wafv2:eu-central-1:842421036527:regional/webacl/StagingWAF-ntr/0253c57b-1ca8-1230-08c3-002ed41c7474", "action": "ALLOW", "httpRequest.clientIp": "18.202.158.90", "httpRequest.country": "US", "httpRequest.uri": "/dashboard", "httpRequest.httpMethod": "OPTIONS", "httpSourceId": "842421036527:ar8rqbq3q9:test", "terminatingRuleId": "XSSProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 1} +{"index": {"_id": "38"}} +{"start_time": "2025-10-03T00:12:25.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:500782617247:regional/webacl/SecurityWAF-rmq/058663fd-0642-0a6d-1ed9-00396db3859d", "action": "ALLOW", "httpRequest.clientIp": "163.139.56.124", "httpRequest.country": "GB", "httpRequest.uri": "/payment", "httpRequest.httpMethod": "POST", "httpSourceId": "500782617247:t7i7t9i6e7:v1", "terminatingRuleId": "AWS-AWSManagedRulesLinuxRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 8} +{"index": {"_id": "39"}} +{"start_time": "2025-10-23T11:54:39.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:723222443533:regional/webacl/SecurityWAF-nzm/02c03747-1339-0e67-22a8-00795c6a2f3e", "action": "ALLOW", "httpRequest.clientIp": "216.23.97.136", "httpRequest.country": "US", "httpRequest.uri": "/forgot-password", "httpRequest.httpMethod": "POST", "httpSourceId": "723222443533:aiv8gm1whw:v2", "terminatingRuleId": "AWS-AWSManagedRulesCommonRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesAmazonIpReputationList", "event_count": 1} +{"index": {"_id": "40"}} +{"start_time": "2025-10-02T20:09:31.162224+0000", "webaclId": "arn:aws:wafv2:eu-north-1:617268136311:regional/webacl/SecurityWAF-egf/0552db98-08d5-1b27-1a16-0032ec9d28ac", "action": "ALLOW", "httpRequest.clientIp": "182.102.190.220", "httpRequest.country": "BE", "httpRequest.uri": "/health", "httpRequest.httpMethod": "GET", "httpSourceId": "617268136311:v4ruj26ill:v2", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 10} +{"index": {"_id": "41"}} +{"start_time": "2025-10-04T03:42:13.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:642484971519:regional/webacl/SecurityWAF-wwk/03a2fa5d-221d-0be0-25a9-003199da8b07", "action": "ALLOW", "httpRequest.clientIp": "208.233.97.74", "httpRequest.country": "RU", "httpRequest.uri": "/docs", "httpRequest.httpMethod": "POST", "httpSourceId": "642484971519:12ta5r54jb:dev", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 1} +{"index": {"_id": "42"}} +{"start_time": "2025-05-16T15:48:10.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:485991240578:regional/webacl/DevWAF-mnz/0170881a-1c4c-1093-0d48-003f7d1ada59", "action": "ALLOW", "httpRequest.clientIp": "133.230.243.44", "httpRequest.country": "SG", "httpRequest.uri": "/profile", "httpRequest.httpMethod": "POST", "httpSourceId": "485991240578:ape881h5oz:test", "terminatingRuleId": "XSSProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesAmazonIpReputationList", "event_count": 1} +{"index": {"_id": "43"}} +{"start_time": "2025-07-05T07:13:26.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:145221630661:regional/webacl/DevWAF-lzc/015b1579-0b4f-1f01-1df5-002bb48bbecf", "action": "ALLOW", "httpRequest.clientIp": "158.175.26.103", "httpRequest.country": "ZA", "httpRequest.uri": "/beta", "httpRequest.httpMethod": "GET", "httpSourceId": "145221630661:qu0ogoqzgy:prod", "terminatingRuleId": "CustomRateLimitRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 3} +{"index": {"_id": "44"}} +{"start_time": "2025-10-27T19:18:51.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:649798759553:regional/webacl/TestWAF-nnz/010f0259-08e9-0860-1ef7-0033000e85b5", "action": "ALLOW", "httpRequest.clientIp": "193.151.32.35", "httpRequest.country": "DE", "httpRequest.uri": "/account", "httpRequest.httpMethod": "GET", "httpSourceId": "649798759553:98k5eo7n42:v1", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 3} +{"index": {"_id": "45"}} +{"start_time": "2025-08-06T15:48:14.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:948998259526:regional/webacl/ProductionWAF-xjh/01c9a834-203f-24f0-0eb0-003ba3daff1b", "action": "ALLOW", "httpRequest.clientIp": "45.22.127.116", "httpRequest.country": "RU", "httpRequest.uri": "/api/v1/products", "httpRequest.httpMethod": "GET", "httpSourceId": "948998259526:u5z7atre6j:dev", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 8} +{"index": {"_id": "46"}} +{"start_time": "2025-10-04T16:09:46.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-1:193447574359:regional/webacl/DevWAF-qei/0362b65c-15de-04f5-209b-001e0581b739", "action": "ALLOW", "httpRequest.clientIp": "169.97.81.90", "httpRequest.country": "US", "httpRequest.uri": "/checkout", "httpRequest.httpMethod": "GET", "httpSourceId": "193447574359:vmx83krlxx:v1", "terminatingRuleId": "AWS-AWSManagedRulesCommonRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 8} +{"index": {"_id": "47"}} +{"start_time": "2025-09-08T04:05:51.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:380668042135:regional/webacl/APIWAF-svw/03b5e371-1ba1-1c47-14a7-003f9ec7fa1e", "action": "ALLOW", "httpRequest.clientIp": "98.242.41.109", "httpRequest.country": "FR", "httpRequest.uri": "/v2", "httpRequest.httpMethod": "GET", "httpSourceId": "380668042135:jiwlqpjsn9:dev", "terminatingRuleId": "Default_Action", "RuleType": "MANAGED", "ruleGroupList.ruleId": "", "event_count": 1} +{"index": {"_id": "48"}} +{"start_time": "2025-09-08T21:15:43.162224+0000", "webaclId": "arn:aws:wafv2:us-east-1:242024843132:regional/webacl/StagingWAF-pkq/03a6f119-127d-204b-06b7-001d0fbe8ff0", "action": "ALLOW", "httpRequest.clientIp": "200.36.221.181", "httpRequest.country": "CL", "httpRequest.uri": "/api/v2/users", "httpRequest.httpMethod": "POST", "httpSourceId": "242024843132:0yame5bp5u:prod", "terminatingRuleId": "XSSProtectionRule", "RuleType": "GROUP", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 15} +{"index": {"_id": "49"}} +{"start_time": "2025-06-08T05:21:11.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:665754759248:regional/webacl/SecurityWAF-zfd/022362d9-2340-10c4-176f-009f5bb8b638", "action": "ALLOW", "httpRequest.clientIp": "45.141.140.113", "httpRequest.country": "DE", "httpRequest.uri": "/account", "httpRequest.httpMethod": "HEAD", "httpSourceId": "665754759248:qimygw55np:test", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 1} +{"index": {"_id": "50"}} +{"start_time": "2025-10-15T01:36:00.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:730147088910:regional/webacl/SecurityWAF-wzi/039f93e9-223c-073e-0d31-0034bc8cba89", "action": "ALLOW", "httpRequest.clientIp": "134.87.77.39", "httpRequest.country": "CA", "httpRequest.uri": "/api/v2/search", "httpRequest.httpMethod": "GET", "httpSourceId": "730147088910:08s411v00r:test", "terminatingRuleId": "AWS-AWSManagedRulesCommonRuleSet", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 1} +{"index": {"_id": "51"}} +{"start_time": "2025-07-27T21:41:46.162224+0000", "webaclId": "arn:aws:wafv2:us-east-1:982634220848:regional/webacl/TestWAF-qzt/02e04f35-073d-0b99-13f2-00bf01c505b3", "action": "BLOCK", "httpRequest.clientIp": "40.56.111.84", "httpRequest.country": "US", "httpRequest.uri": "/download", "httpRequest.httpMethod": "GET", "httpSourceId": "982634220848:upv8lxwf07:test", "terminatingRuleId": "CSRFProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 1} +{"index": {"_id": "52"}} +{"start_time": "2025-08-12T06:51:30.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-1:202471939738:regional/webacl/APIWAF-urf/0521bf75-0785-09c6-0cfb-00bb35bc5851", "action": "BLOCK", "httpRequest.clientIp": "95.223.247.216", "httpRequest.country": "JP", "httpRequest.uri": "/register", "httpRequest.httpMethod": "GET", "httpSourceId": "202471939738:0hknekvvs7:prod", "terminatingRuleId": "AWS-AWSManagedRulesUnixRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 3} +{"index": {"_id": "53"}} +{"start_time": "2025-10-10T01:48:29.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:764651792910:regional/webacl/TestWAF-vdi/02c4f943-0582-0a32-07ba-0062e4bd88bc", "action": "ALLOW", "httpRequest.clientIp": "147.165.184.123", "httpRequest.country": "US", "httpRequest.uri": "/product/123", "httpRequest.httpMethod": "POST", "httpSourceId": "764651792910:90ixgqxr46:staging", "terminatingRuleId": "Default_Action", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "", "event_count": 1} +{"index": {"_id": "54"}} +{"start_time": "2025-10-29T10:12:22.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:543944840289:regional/webacl/ProductionWAF-tnk/02c0c78c-07cc-1b80-1cb3-003da6e47951", "action": "ALLOW", "httpRequest.clientIp": "154.178.101.29", "httpRequest.country": "BR", "httpRequest.uri": "/images", "httpRequest.httpMethod": "CONNECT", "httpSourceId": "543944840289:q8f3fs9wa7:staging", "terminatingRuleId": "CustomGeoBlockRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 2} +{"index": {"_id": "55"}} +{"start_time": "2025-06-08T17:46:35.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:806561232496:regional/webacl/ProductionWAF-vfp/03b3dcf4-2043-06da-06e3-008053fd8238", "action": "ALLOW", "httpRequest.clientIp": "163.102.19.219", "httpRequest.country": "IE", "httpRequest.uri": "/api/v2/payments", "httpRequest.httpMethod": "POST", "httpSourceId": "806561232496:5kx09bomae:v1", "terminatingRuleId": "Default_Action", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 1} +{"index": {"_id": "56"}} +{"start_time": "2025-09-13T00:13:50.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-1:274301826483:regional/webacl/TestWAF-yjq/0472619c-0919-0fbe-0dca-009b117627bf", "action": "ALLOW", "httpRequest.clientIp": "184.12.90.229", "httpRequest.country": "NL", "httpRequest.uri": "/status", "httpRequest.httpMethod": "GET", "httpSourceId": "274301826483:w0z4c3pmk2:dev", "terminatingRuleId": "CustomGeoBlockRule", "RuleType": "GROUP", "ruleGroupList.ruleId": "", "event_count": 5} +{"index": {"_id": "57"}} +{"start_time": "2025-07-03T03:49:34.162224+0000", "webaclId": "arn:aws:wafv2:eu-central-1:970191749475:regional/webacl/APIWAF-oqy/02c3b7aa-1565-12d4-0b8b-0022d7a6f387", "action": "ALLOW", "httpRequest.clientIp": "203.164.189.116", "httpRequest.country": "US", "httpRequest.uri": "/docs", "httpRequest.httpMethod": "GET", "httpSourceId": "970191749475:z1k5s4yisp:staging", "terminatingRuleId": "CustomGeoBlockRule", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 8} +{"index": {"_id": "58"}} +{"start_time": "2025-10-07T18:47:19.162224+0000", "webaclId": "arn:aws:wafv2:us-east-1:858456905009:regional/webacl/TestWAF-ovi/04a6440e-1319-11ac-0e0b-0078764eccf2", "action": "ALLOW", "httpRequest.clientIp": "199.222.214.187", "httpRequest.country": "US", "httpRequest.uri": "/api/v2/search", "httpRequest.httpMethod": "GET", "httpSourceId": "858456905009:fem8yw0l3k:v2", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 1} +{"index": {"_id": "59"}} +{"start_time": "2025-08-01T17:04:41.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:439975148447:regional/webacl/DevWAF-gbm/0310aa31-12c4-1ecf-1de9-0069f0cbe713", "action": "ALLOW", "httpRequest.clientIp": "165.201.4.103", "httpRequest.country": "GB", "httpRequest.uri": "/billing", "httpRequest.httpMethod": "GET", "httpSourceId": "439975148447:zxqkkioqbm:prod", "terminatingRuleId": "Default_Action", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 1} +{"index": {"_id": "60"}} +{"start_time": "2025-08-28T23:59:00.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:650560725646:regional/webacl/APIWAF-kdq/04e3afc6-1f5e-2557-08c2-001896e0b490", "action": "ALLOW", "httpRequest.clientIp": "189.221.116.192", "httpRequest.country": "MX", "httpRequest.uri": "/ping", "httpRequest.httpMethod": "POST", "httpSourceId": "650560725646:z93kow1evi:staging", "terminatingRuleId": "XSSProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 2} +{"index": {"_id": "61"}} +{"start_time": "2025-06-06T12:46:18.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:111094334819:regional/webacl/SecurityWAF-fnj/0353db70-0a62-0b46-0df9-00407ea10c85", "action": "ALLOW", "httpRequest.clientIp": "193.94.103.199", "httpRequest.country": "US", "httpRequest.uri": "/api/v2/search", "httpRequest.httpMethod": "GET", "httpSourceId": "111094334819:xrt1wykug7:staging", "terminatingRuleId": "AWS-AWSManagedRulesSQLiRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 1} +{"index": {"_id": "62"}} +{"start_time": "2025-05-09T01:11:18.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:306790343292:regional/webacl/DevWAF-sew/039b3f57-1cab-0824-15db-00ad34881e9b", "action": "BLOCK", "httpRequest.clientIp": "178.122.230.147", "httpRequest.country": "US", "httpRequest.uri": "/api/v2/users", "httpRequest.httpMethod": "GET", "httpSourceId": "306790343292:4wbgvdinia:prod", "terminatingRuleId": "CustomIPWhitelistRule", "RuleType": "GROUP", "ruleGroupList.ruleId": "AWSManagedRulesAmazonIpReputationList", "event_count": 5} +{"index": {"_id": "63"}} +{"start_time": "2025-10-17T02:55:51.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:402518329742:regional/webacl/SecurityWAF-yfl/033f2a33-1325-0978-0974-00a52873fc03", "action": "BLOCK", "httpRequest.clientIp": "39.172.246.154", "httpRequest.country": "FI", "httpRequest.uri": "/home", "httpRequest.httpMethod": "DELETE", "httpSourceId": "402518329742:wbfkg7bmxm:dev", "terminatingRuleId": "AWS-AWSManagedRulesLinuxRuleSet", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 3} +{"index": {"_id": "64"}} +{"start_time": "2025-10-29T06:07:01.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:568627903423:regional/webacl/APIWAF-anw/00ec82b6-06ce-153d-1b61-00ddf3408ce1", "action": "ALLOW", "httpRequest.clientIp": "216.101.44.111", "httpRequest.country": "IN", "httpRequest.uri": "/metrics", "httpRequest.httpMethod": "POST", "httpSourceId": "568627903423:r6ubvo3m9b:v2", "terminatingRuleId": "CustomIPWhitelistRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 5} +{"index": {"_id": "65"}} +{"start_time": "2025-09-14T22:26:53.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-1:455255499022:regional/webacl/APIWAF-lui/01340c69-1215-2625-0f87-00cf186cef2d", "action": "ALLOW", "httpRequest.clientIp": "32.147.248.87", "httpRequest.country": "BR", "httpRequest.uri": "/home", "httpRequest.httpMethod": "GET", "httpSourceId": "455255499022:kk0a43ff7q:v1", "terminatingRuleId": "AWS-AWSManagedRulesSQLiRuleSet", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 3} +{"index": {"_id": "66"}} +{"start_time": "2025-05-07T08:26:43.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:749258896658:regional/webacl/APIWAF-hun/02a990ec-18cc-071b-1599-007f8959341d", "action": "BLOCK", "httpRequest.clientIp": "139.15.200.185", "httpRequest.country": "US", "httpRequest.uri": "/billing", "httpRequest.httpMethod": "GET", "httpSourceId": "749258896658:0k4s6khbwn:v2", "terminatingRuleId": "SQLInjectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 1} +{"index": {"_id": "67"}} +{"start_time": "2025-05-27T04:41:18.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:731295791113:regional/webacl/SecurityWAF-aok/05dbda8c-0cf6-1411-0585-00577b348a61", "action": "ALLOW", "httpRequest.clientIp": "120.195.76.131", "httpRequest.country": "GB", "httpRequest.uri": "/files", "httpRequest.httpMethod": "DELETE", "httpSourceId": "731295791113:9webeox3nn:staging", "terminatingRuleId": "XSSProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 10} +{"index": {"_id": "68"}} +{"start_time": "2025-09-01T14:31:40.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:959384828544:regional/webacl/TestWAF-kma/00e0dd1d-228d-1a07-2173-005ecac96792", "action": "ALLOW", "httpRequest.clientIp": "181.41.199.91", "httpRequest.country": "US", "httpRequest.uri": "/admin/dashboard", "httpRequest.httpMethod": "POST", "httpSourceId": "959384828544:c9t85ngog2:v1", "terminatingRuleId": "CustomIPWhitelistRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 2} +{"index": {"_id": "69"}} +{"start_time": "2025-10-31T02:06:08.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:530580693428:regional/webacl/DevWAF-cmh/04b7b401-1f1d-14d0-151a-00460edbf1bf", "action": "ALLOW", "httpRequest.clientIp": "111.90.239.133", "httpRequest.country": "US", "httpRequest.uri": "/test", "httpRequest.httpMethod": "GET", "httpSourceId": "530580693428:orj9gga7jo:staging", "terminatingRuleId": "CustomIPWhitelistRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 5} +{"index": {"_id": "70"}} +{"start_time": "2025-05-14T07:23:02.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:347042375091:regional/webacl/ProductionWAF-izy/03501c2f-20b4-26e3-0c36-001a04b69fd7", "action": "BLOCK", "httpRequest.clientIp": "113.219.64.93", "httpRequest.country": "US", "httpRequest.uri": "/health", "httpRequest.httpMethod": "GET", "httpSourceId": "347042375091:x7z7uoxgih:staging", "terminatingRuleId": "CustomGeoBlockRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 3} +{"index": {"_id": "71"}} +{"start_time": "2025-09-10T23:57:17.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:594392196294:regional/webacl/TestWAF-xbv/015295ed-0638-20b0-24a6-00649d86ac13", "action": "ALLOW", "httpRequest.clientIp": "18.115.57.49", "httpRequest.country": "BE", "httpRequest.uri": "/api/v2/search", "httpRequest.httpMethod": "POST", "httpSourceId": "594392196294:fvpspaz03y:dev", "terminatingRuleId": "XSSProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 2} +{"index": {"_id": "72"}} +{"start_time": "2025-07-09T14:33:41.162224+0000", "webaclId": "arn:aws:wafv2:eu-north-1:416663120418:regional/webacl/DevWAF-vja/01dd2fcb-1698-211f-19e0-002a1a2c357f", "action": "ALLOW", "httpRequest.clientIp": "198.30.238.158", "httpRequest.country": "RU", "httpRequest.uri": "/css/style.css", "httpRequest.httpMethod": "GET", "httpSourceId": "416663120418:5myq2sajcc:v1", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "GROUP", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 1} +{"index": {"_id": "73"}} +{"start_time": "2025-10-17T09:20:47.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:517854724418:regional/webacl/DevWAF-fnu/01e6fdf7-1462-12c8-08a3-00203f2932ed", "action": "ALLOW", "httpRequest.clientIp": "111.138.30.194", "httpRequest.country": "US", "httpRequest.uri": "/help", "httpRequest.httpMethod": "GET", "httpSourceId": "517854724418:edw7p411wv:v2", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 1} +{"index": {"_id": "74"}} +{"start_time": "2025-09-08T18:29:47.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:258525442777:regional/webacl/StagingWAF-hfd/0381bddb-11a4-0a0c-1a39-0031121cd75d", "action": "ALLOW", "httpRequest.clientIp": "213.71.118.151", "httpRequest.country": "CA", "httpRequest.uri": "/beta", "httpRequest.httpMethod": "GET", "httpSourceId": "258525442777:4wilt0utcp:staging", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 8} +{"index": {"_id": "75"}} +{"start_time": "2025-07-10T18:30:35.162224+0000", "webaclId": "arn:aws:wafv2:ap-northeast-1:619270372305:regional/webacl/SecurityWAF-noy/00f7c6a8-0fa7-058d-103b-0056775fe1c1", "action": "BLOCK", "httpRequest.clientIp": "89.69.63.107", "httpRequest.country": "JP", "httpRequest.uri": "/contact", "httpRequest.httpMethod": "GET", "httpSourceId": "619270372305:17xydndcjk:v1", "terminatingRuleId": "Default_Action", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 5} +{"index": {"_id": "76"}} +{"start_time": "2025-07-20T05:36:10.162224+0000", "webaclId": "arn:aws:wafv2:us-west-1:450587685162:regional/webacl/DevWAF-rxs/04f2b672-22a6-1179-08e3-002b06fc2b7f", "action": "ALLOW", "httpRequest.clientIp": "50.64.193.208", "httpRequest.country": "DE", "httpRequest.uri": "/about", "httpRequest.httpMethod": "GET", "httpSourceId": "450587685162:5ttceerz37:prod", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 1} +{"index": {"_id": "77"}} +{"start_time": "2025-05-09T23:22:12.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-1:886593622606:regional/webacl/ProductionWAF-qat/01bbd759-0ffd-1160-113a-00a67c514fe6", "action": "ALLOW", "httpRequest.clientIp": "177.135.241.158", "httpRequest.country": "US", "httpRequest.uri": "/css/style.css", "httpRequest.httpMethod": "GET", "httpSourceId": "886593622606:9xnpc58u4f:v2", "terminatingRuleId": "CustomRateLimitRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 1} +{"index": {"_id": "78"}} +{"start_time": "2025-10-04T06:08:03.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:837955482738:regional/webacl/APIWAF-llv/0141a030-1570-0c0e-19ee-007d2c72035d", "action": "ALLOW", "httpRequest.clientIp": "41.23.236.14", "httpRequest.country": "US", "httpRequest.uri": "/admin", "httpRequest.httpMethod": "POST", "httpSourceId": "837955482738:968xop6lu3:v2", "terminatingRuleId": "Default_Action", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 1} +{"index": {"_id": "79"}} +{"start_time": "2025-07-18T15:26:05.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:534614123786:regional/webacl/ProductionWAF-gwh/00d580a1-04d6-0d37-0754-00c07613d4e8", "action": "ALLOW", "httpRequest.clientIp": "179.89.222.88", "httpRequest.country": "GB", "httpRequest.uri": "/help", "httpRequest.httpMethod": "GET", "httpSourceId": "534614123786:f4j5zymj2d:prod", "terminatingRuleId": "Default_Action", "RuleType": "GROUP", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 1} +{"index": {"_id": "80"}} +{"start_time": "2025-08-06T13:08:48.162224+0000", "webaclId": "arn:aws:wafv2:us-east-1:960014064038:regional/webacl/TestWAF-alc/0319d44a-05d2-1473-1368-002ce5c05e11", "action": "BLOCK", "httpRequest.clientIp": "39.85.90.232", "httpRequest.country": "MX", "httpRequest.uri": "/api/v2/users", "httpRequest.httpMethod": "DELETE", "httpSourceId": "960014064038:1rtydo8nde:test", "terminatingRuleId": "Default_Action", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 2} +{"index": {"_id": "81"}} +{"start_time": "2025-06-30T13:14:22.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-2:148466823899:regional/webacl/SecurityWAF-zob/044de0b3-188d-23e5-2091-0059d7a23992", "action": "ALLOW", "httpRequest.clientIp": "166.222.235.96", "httpRequest.country": "US", "httpRequest.uri": "/uploads", "httpRequest.httpMethod": "GET", "httpSourceId": "148466823899:xd4jad2arp:v2", "terminatingRuleId": "Default_Action", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 1} +{"index": {"_id": "82"}} +{"start_time": "2025-08-04T10:28:34.162224+0000", "webaclId": "arn:aws:wafv2:ap-south-1:135540969713:regional/webacl/StagingWAF-iep/02387aad-18a5-12bf-176d-0028d9125c41", "action": "ALLOW", "httpRequest.clientIp": "67.226.65.57", "httpRequest.country": "RU", "httpRequest.uri": "/test", "httpRequest.httpMethod": "POST", "httpSourceId": "135540969713:d4fi2uldrh:dev", "terminatingRuleId": "SQLInjectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 10} +{"index": {"_id": "83"}} +{"start_time": "2025-08-30T22:42:57.162224+0000", "webaclId": "arn:aws:wafv2:ap-northeast-1:498681864694:regional/webacl/StagingWAF-gwm/04a0faaa-257c-0e86-23de-008439279608", "action": "ALLOW", "httpRequest.clientIp": "217.174.24.163", "httpRequest.country": "US", "httpRequest.uri": "/dev", "httpRequest.httpMethod": "GET", "httpSourceId": "498681864694:rw5rixvig2:v2", "terminatingRuleId": "AWS-AWSManagedRulesSQLiRuleSet", "RuleType": "GROUP", "ruleGroupList.ruleId": "AWSManagedRulesLinuxRuleSet", "event_count": 1} +{"index": {"_id": "84"}} +{"start_time": "2025-07-16T19:07:17.162224+0000", "webaclId": "arn:aws:wafv2:eu-north-1:114388501815:regional/webacl/SecurityWAF-taw/0159f2eb-255b-1351-1a98-009bb9161148", "action": "ALLOW", "httpRequest.clientIp": "194.179.52.105", "httpRequest.country": "PT", "httpRequest.uri": "/contact", "httpRequest.httpMethod": "GET", "httpSourceId": "114388501815:eo9l6j7ojp:dev", "terminatingRuleId": "AWS-AWSManagedRulesCommonRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 1} +{"index": {"_id": "85"}} +{"start_time": "2025-06-15T15:30:23.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:698021885330:regional/webacl/SecurityWAF-giz/04fa0fa1-1dc7-2111-237e-00cf3191d72c", "action": "ALLOW", "httpRequest.clientIp": "179.54.56.192", "httpRequest.country": "SE", "httpRequest.uri": "/css/style.css", "httpRequest.httpMethod": "GET", "httpSourceId": "698021885330:7jmk2qjitf:test", "terminatingRuleId": "AWS-AWSManagedRulesCommonRuleSet", "RuleType": "MANAGED", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 1} +{"index": {"_id": "86"}} +{"start_time": "2025-06-14T04:02:32.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:754882139451:regional/webacl/SecurityWAF-uvu/02151b2a-1e14-1922-0d69-00d2b2cf501b", "action": "ALLOW", "httpRequest.clientIp": "212.183.48.35", "httpRequest.country": "US", "httpRequest.uri": "/faq", "httpRequest.httpMethod": "GET", "httpSourceId": "754882139451:7vc82au4sp:staging", "terminatingRuleId": "Default_Action", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 2} +{"index": {"_id": "87"}} +{"start_time": "2025-05-20T12:47:34.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-2:787106827369:regional/webacl/StagingWAF-oye/01fbfc49-0e59-0542-07ed-00d16565e9f0", "action": "ALLOW", "httpRequest.clientIp": "129.235.123.41", "httpRequest.country": "GB", "httpRequest.uri": "/billing", "httpRequest.httpMethod": "GET", "httpSourceId": "787106827369:xugceswrh4:dev", "terminatingRuleId": "SQLInjectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "", "event_count": 3} +{"index": {"_id": "88"}} +{"start_time": "2025-05-05T19:53:54.162224+0000", "webaclId": "arn:aws:wafv2:ap-northeast-1:861882388437:regional/webacl/ProductionWAF-ojv/02c77197-0fa9-22a2-16b7-0031436e9607", "action": "ALLOW", "httpRequest.clientIp": "144.107.112.65", "httpRequest.country": "IN", "httpRequest.uri": "/products", "httpRequest.httpMethod": "POST", "httpSourceId": "861882388437:57q45ln5ye:staging", "terminatingRuleId": "XSSProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 5} +{"index": {"_id": "89"}} +{"start_time": "2025-05-08T19:18:23.162224+0000", "webaclId": "arn:aws:wafv2:ap-southeast-1:961436783084:regional/webacl/StagingWAF-xil/00f83c58-1817-1ecc-196b-009f844148fc", "action": "ALLOW", "httpRequest.clientIp": "126.188.154.227", "httpRequest.country": "GB", "httpRequest.uri": "/download", "httpRequest.httpMethod": "GET", "httpSourceId": "961436783084:5gg1f85c1f:v2", "terminatingRuleId": "AWS-AWSManagedRulesSQLiRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 8} +{"index": {"_id": "90"}} +{"start_time": "2025-05-21T22:18:48.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:778028775114:regional/webacl/TestWAF-evq/029a62b4-222b-053f-0db7-00ab0d1b6c9f", "action": "ALLOW", "httpRequest.clientIp": "17.118.229.96", "httpRequest.country": "CA", "httpRequest.uri": "/admin/dashboard", "httpRequest.httpMethod": "GET", "httpSourceId": "778028775114:p98vv7p55s:dev", "terminatingRuleId": "CustomGeoBlockRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 1} +{"index": {"_id": "91"}} +{"start_time": "2025-06-01T11:59:31.162224+0000", "webaclId": "arn:aws:wafv2:us-west-2:699303008300:regional/webacl/ProductionWAF-ayy/0550534e-10c2-1c7d-0a8e-00b596e4636e", "action": "ALLOW", "httpRequest.clientIp": "222.98.173.234", "httpRequest.country": "US", "httpRequest.uri": "/account", "httpRequest.httpMethod": "GET", "httpSourceId": "699303008300:41783ov98z:staging", "terminatingRuleId": "AWS-AWSManagedRulesLinuxRuleSet", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 2} +{"index": {"_id": "92"}} +{"start_time": "2025-07-27T15:07:41.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-1:619554842546:regional/webacl/StagingWAF-hqu/012b79fd-1902-0dd1-0e76-0048aff6a099", "action": "ALLOW", "httpRequest.clientIp": "210.62.170.38", "httpRequest.country": "US", "httpRequest.uri": "/about", "httpRequest.httpMethod": "PATCH", "httpSourceId": "619554842546:gajel443mw:test", "terminatingRuleId": "AWS-AWSManagedRulesKnownBadInputsRuleSet", "RuleType": "REGULAR", "ruleGroupList.ruleId": "CustomSecurityRules", "event_count": 10} +{"index": {"_id": "93"}} +{"start_time": "2025-06-25T09:39:39.162224+0000", "webaclId": "arn:aws:wafv2:eu-north-1:957160307475:regional/webacl/ProductionWAF-wiq/04419f15-069b-1995-0d2c-00d6e0459bec", "action": "BLOCK", "httpRequest.clientIp": "85.27.13.121", "httpRequest.country": "DE", "httpRequest.uri": "/cart", "httpRequest.httpMethod": "GET", "httpSourceId": "957160307475:879pk98gqh:test", "terminatingRuleId": "CustomRateLimitRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesSQLiRuleSet", "event_count": 1} +{"index": {"_id": "94"}} +{"start_time": "2025-07-05T07:04:00.162224+0000", "webaclId": "arn:aws:wafv2:us-east-1:308194230030:regional/webacl/StagingWAF-ewj/046b79fd-19dd-127f-1b9a-00b700624ed6", "action": "ALLOW", "httpRequest.clientIp": "16.40.17.72", "httpRequest.country": "BR", "httpRequest.uri": "/order/456", "httpRequest.httpMethod": "GET", "httpSourceId": "308194230030:x5pd0st5rf:dev", "terminatingRuleId": "XSSProtectionRule", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesAmazonIpReputationList", "event_count": 3} +{"index": {"_id": "95"}} +{"start_time": "2025-08-29T05:06:00.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:798974725284:regional/webacl/DevWAF-mka/01891864-239f-14e0-1ef6-0047d39f4465", "action": "BLOCK", "httpRequest.clientIp": "14.208.28.47", "httpRequest.country": "BR", "httpRequest.uri": "/api/v1/auth", "httpRequest.httpMethod": "GET", "httpSourceId": "798974725284:34tgasoyc7:prod", "terminatingRuleId": "AWS-AWSManagedRulesLinuxRuleSet", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 2} +{"index": {"_id": "96"}} +{"start_time": "2025-09-01T08:18:36.162224+0000", "webaclId": "arn:aws:wafv2:eu-central-1:868007491231:regional/webacl/DevWAF-ctt/01a94aef-2491-1a1e-081d-0046f9e1174c", "action": "BLOCK", "httpRequest.clientIp": "53.217.193.212", "httpRequest.country": "DE", "httpRequest.uri": "/js/app.js", "httpRequest.httpMethod": "POST", "httpSourceId": "868007491231:tcqjdx72mw:v2", "terminatingRuleId": "XSSProtectionRule", "RuleType": "MANAGED", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 1} +{"index": {"_id": "97"}} +{"start_time": "2025-08-31T10:00:11.162224+0000", "webaclId": "arn:aws:wafv2:ap-northeast-1:767003732356:regional/webacl/DevWAF-tih/04d822d6-1021-04af-09aa-00ba764d8ef5", "action": "ALLOW", "httpRequest.clientIp": "218.184.9.14", "httpRequest.country": "ES", "httpRequest.uri": "/uploads", "httpRequest.httpMethod": "GET", "httpSourceId": "767003732356:g3dwk4udly:v1", "terminatingRuleId": "AWS-AWSManagedRulesUnixRuleSet", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "CustomRateLimitRules", "event_count": 3} +{"index": {"_id": "98"}} +{"start_time": "2025-10-14T17:32:31.162224+0000", "webaclId": "arn:aws:wafv2:eu-west-1:687188340765:regional/webacl/StagingWAF-fty/02c2fb3d-1e42-1ab5-1c0f-0055c4c50a1f", "action": "BLOCK", "httpRequest.clientIp": "206.206.144.70", "httpRequest.country": "SG", "httpRequest.uri": "/profile", "httpRequest.httpMethod": "PATCH", "httpSourceId": "687188340765:qsnru0r3en:dev", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "REGULAR", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 15} +{"index": {"_id": "99"}} +{"start_time": "2025-07-27T19:43:23.162224+0000", "webaclId": "arn:aws:wafv2:eu-north-1:784929861488:regional/webacl/DevWAF-viy/0207a250-23b4-196a-2097-00a34831d4fe", "action": "ALLOW", "httpRequest.clientIp": "134.153.117.16", "httpRequest.country": "CN", "httpRequest.uri": "/images", "httpRequest.httpMethod": "GET", "httpSourceId": "784929861488:hfwssdolg7:dev", "terminatingRuleId": "AWS-AWSManagedRulesAmazonIpReputationList", "RuleType": "GROUP", "ruleGroupList.ruleId": "AWSManagedRulesKnownBadInputsRuleSet", "event_count": 2} +{"index": {"_id": "100"}} +{"start_time": "2025-09-07T23:31:31.162224+0000", "webaclId": "arn:aws:wafv2:us-east-2:408972772269:regional/webacl/ProductionWAF-onn/01616761-15e0-13bc-2288-0035113381ef", "action": "ALLOW", "httpRequest.clientIp": "222.60.9.140", "httpRequest.country": "CH", "httpRequest.uri": "/files", "httpRequest.httpMethod": "GET", "httpSourceId": "408972772269:yp53tdkcv4:test", "terminatingRuleId": "AWS-AWSManagedRulesSQLiRuleSet", "RuleType": "RATE_BASED", "ruleGroupList.ruleId": "AWSManagedRulesCommonRuleSet", "event_count": 2} From 1da0ff466aa44a03722e5c3adaeb650d73795a9e Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Mon, 10 Nov 2025 15:04:11 -0800 Subject: [PATCH 38/99] [Maintenance] Enforce PR label of 'bugFix' instead of 'bug' (#4773) --- .github/workflows/enforce-labels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/enforce-labels.yml b/.github/workflows/enforce-labels.yml index 9f77826c706..6f1defc0149 100644 --- a/.github/workflows/enforce-labels.yml +++ b/.github/workflows/enforce-labels.yml @@ -9,5 +9,5 @@ jobs: steps: - uses: yogevbd/enforce-label-action@2.1.0 with: - REQUIRED_LABELS_ANY: "breaking,feature,enhancement,bug,infrastructure,dependencies,documentation,maintenance,skip-changelog,testing,security fix" - REQUIRED_LABELS_ANY_DESCRIPTION: "A release label is required: ['breaking', 'bug', 'dependencies', 'documentation', 'enhancement', 'feature', 'infrastructure', 'maintenance', 'skip-changelog', 'testing', 'security fix']" + REQUIRED_LABELS_ANY: "breaking,feature,enhancement,bugFix,infrastructure,dependencies,documentation,maintenance,skip-changelog,testing,security fix" + REQUIRED_LABELS_ANY_DESCRIPTION: "A release label is required: ['breaking', 'bugFix', 'dependencies', 'documentation', 'enhancement', 'feature', 'infrastructure', 'maintenance', 'skip-changelog', 'testing', 'security fix']" From 5935ef8b04196478c0d94fd99af00bed64f63634 Mon Sep 17 00:00:00 2001 From: Tomoyuki MORITA Date: Tue, 11 Nov 2025 14:17:23 -0800 Subject: [PATCH 39/99] Update github workflows to move from macos-13 to 14 (#4779) Signed-off-by: Tomoyuki Morita --- .github/workflows/integ-tests-with-security.yml | 2 +- .github/workflows/sql-test-and-build-workflow.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integ-tests-with-security.yml b/.github/workflows/integ-tests-with-security.yml index 8c2ee35408a..398734a80db 100644 --- a/.github/workflows/integ-tests-with-security.yml +++ b/.github/workflows/integ-tests-with-security.yml @@ -62,7 +62,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ windows-latest, macos-13 ] + os: [ windows-latest, macos-14 ] java: [21, 24] runs-on: ${{ matrix.os }} diff --git a/.github/workflows/sql-test-and-build-workflow.yml b/.github/workflows/sql-test-and-build-workflow.yml index a671420fd8f..151cf0f16d4 100644 --- a/.github/workflows/sql-test-and-build-workflow.yml +++ b/.github/workflows/sql-test-and-build-workflow.yml @@ -107,8 +107,8 @@ jobs: entry: - { os: windows-latest, java: 21, os_build_args: -PbuildPlatform=windows } - { os: windows-latest, java: 24, os_build_args: -PbuildPlatform=windows } - - { os: macos-13, java: 21, os_build_args: '' } - - { os: macos-13, java: 24, os_build_args: '' } + - { os: macos-14, java: 21, os_build_args: '' } + - { os: macos-14, java: 24, os_build_args: '' } test-type: ['unit', 'integration', 'doc'] exclude: # Exclude doctest for Windows From 88753718a249a5cd8d672f8eb6427d1e5e39a996 Mon Sep 17 00:00:00 2001 From: Kai Huang <105710027+ahkcs@users.noreply.github.com> Date: Tue, 11 Nov 2025 14:24:17 -0800 Subject: [PATCH 40/99] Fix binning udf resolution / Add type coercion support for binning UDFs (#4742) * Fix binning udf resolution Signed-off-by: Kai Huang * spotless fix Signed-off-by: Kai Huang * update tes Signed-off-by: Kai Huang * add comments Signed-off-by: Kai Huang * removal Signed-off-by: Kai Huang * update yaml test Signed-off-by: Kai Huang * rerun Signed-off-by: Kai Huang * update support for type coercion Signed-off-by: Kai Huang * update doc Signed-off-by: Kai Huang * enhance error handling and doc Signed-off-by: Kai Huang * fixes Signed-off-by: Kai Huang * fixes Signed-off-by: Kai Huang * fixes Signed-off-by: Kai Huang * Fix tests Signed-off-by: Kai Huang --------- Signed-off-by: Kai Huang --- .../calcite/utils/OpenSearchTypeFactory.java | 13 +- .../sql/calcite/utils/PPLOperandTypes.java | 59 ++++++++ .../calcite/utils/binning/BinnableField.java | 20 +-- .../binning/handlers/CountBinHandler.java | 17 ++- .../binning/handlers/DefaultBinHandler.java | 16 +- .../utils/binning/handlers/LogSpanHelper.java | 26 +++- .../binning/handlers/MinSpanBinHandler.java | 17 ++- .../binning/handlers/NumericSpanHelper.java | 6 +- .../binning/handlers/RangeBinHandler.java | 13 +- .../utils/binning/time/AlignmentHandler.java | 47 ++++-- .../utils/binning/time/DaySpanHandler.java | 5 +- .../utils/binning/time/MonthSpanHandler.java | 63 +++++--- .../binning/time/StandardTimeSpanHandler.java | 66 ++++++--- .../function/BuiltinFunctionName.java | 6 + .../expression/function/PPLFuncImpTable.java | 13 ++ .../udf/binning/WidthBucketFunction.java | 2 +- docs/user/ppl/cmd/bin.rst | 19 ++- .../calcite/remote/CalciteBinCommandIT.java | 140 +----------------- .../calcite/explain_bin_aligntime.yaml | 6 +- .../explain_bin_aligntime.yaml | 6 +- .../rest-api-spec/test/issues/4740.yml | 120 +++++++++++++++ .../sql/ppl/calcite/CalcitePPLBinTest.java | 108 +++++++++----- 22 files changed, 500 insertions(+), 288 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 914f5acdfc7..d225a797285 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -338,12 +338,12 @@ public static boolean isUserDefinedType(RelDataType type) { } /** - * Checks if the RelDataType represents a numeric field. Supports both standard SQL numeric types - * (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL) and OpenSearch UDT numeric - * types. + * Checks if the RelDataType represents a numeric type. Supports standard SQL numeric types + * (INTEGER, BIGINT, SMALLINT, TINYINT, FLOAT, DOUBLE, DECIMAL, REAL), OpenSearch UDT numeric + * types, and string types (VARCHAR, CHAR). * * @param fieldType the RelDataType to check - * @return true if the type is numeric, false otherwise + * @return true if the type is numeric or string, false otherwise */ public static boolean isNumericType(RelDataType fieldType) { // Check standard SQL numeric types @@ -359,6 +359,11 @@ public static boolean isNumericType(RelDataType fieldType) { return true; } + // Check string types (VARCHAR, CHAR) + if (sqlType == SqlTypeName.VARCHAR || sqlType == SqlTypeName.CHAR) { + return true; + } + // Check for OpenSearch UDT numeric types if (isUserDefinedType(fieldType)) { AbstractExprRelDataType exprType = (AbstractExprRelDataType) fieldType; diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java index 8fb745428e5..abf37e68392 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java @@ -141,6 +141,65 @@ private PPLOperandTypes() {} SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC)); + + public static final UDFOperandMetadata WIDTH_BUCKET_OPERAND = + UDFOperandMetadata.wrap( + (CompositeOperandTypeChecker) + // 1. Numeric fields: bin age span=10 + OperandTypes.family( + SqlTypeFamily.NUMERIC, + SqlTypeFamily.INTEGER, + SqlTypeFamily.NUMERIC, + SqlTypeFamily.NUMERIC) + // 2. Timestamp fields with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) + .or( + OperandTypes.family( + SqlTypeFamily.TIMESTAMP, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, // TIMESTAMP - TIMESTAMP = INTERVAL (as STRING) + SqlTypeFamily.TIMESTAMP)) + // 3. Timestamp fields with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) + .or( + OperandTypes.family( + SqlTypeFamily.TIMESTAMP, + SqlTypeFamily.INTEGER, + SqlTypeFamily.TIMESTAMP, // TIMESTAMP - TIMESTAMP = TIMESTAMP + SqlTypeFamily.TIMESTAMP)) + // DATE field with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) + .or( + OperandTypes.family( + SqlTypeFamily.DATE, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, // DATE - DATE = INTERVAL (as STRING) + SqlTypeFamily.DATE)) + // DATE field with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) + .or( + OperandTypes.family( + SqlTypeFamily.DATE, + SqlTypeFamily.INTEGER, + SqlTypeFamily.DATE, // DATE - DATE = DATE + SqlTypeFamily.DATE)) + // TIME field with OpenSearch type system + // Used in: Production + Integration tests (CalciteBinCommandIT) + .or( + OperandTypes.family( + SqlTypeFamily.TIME, + SqlTypeFamily.INTEGER, + SqlTypeFamily.CHARACTER, // TIME - TIME = INTERVAL (as STRING) + SqlTypeFamily.TIME)) + // TIME field with Calcite SCOTT schema + // Used in: Unit tests (CalcitePPLBinTest) + .or( + OperandTypes.family( + SqlTypeFamily.TIME, + SqlTypeFamily.INTEGER, + SqlTypeFamily.TIME, // TIME - TIME = TIME + SqlTypeFamily.TIME))); + public static final UDFOperandMetadata NUMERIC_NUMERIC_NUMERIC_NUMERIC_NUMERIC = UDFOperandMetadata.wrap( OperandTypes.family( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java index c8c73ce3a99..a4e924b631c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/BinnableField.java @@ -11,13 +11,7 @@ import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.exception.SemanticCheckException; -/** - * Represents a validated field that supports binning operations. The existence of this class - * guarantees that validation has been run - the field is either numeric or time-based. - * - *

This design encodes validation in the type system, preventing downstream code from forgetting - * to validate or running validation multiple times. - */ +/** Represents a field that supports binning operations. */ @Getter public class BinnableField { private final RexNode fieldExpr; @@ -27,13 +21,12 @@ public class BinnableField { private final boolean isNumeric; /** - * Creates a validated BinnableField. Throws SemanticCheckException if the field is neither - * numeric nor time-based. + * Creates a BinnableField. Validates that the field type is compatible with binning operations. * * @param fieldExpr The Rex expression for the field * @param fieldType The relational data type of the field * @param fieldName The name of the field (for error messages) - * @throws SemanticCheckException if the field is neither numeric nor time-based + * @throws SemanticCheckException if the field type is not supported for binning */ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) { this.fieldExpr = fieldExpr; @@ -43,13 +36,10 @@ public BinnableField(RexNode fieldExpr, RelDataType fieldType, String fieldName) this.isTimeBased = OpenSearchTypeFactory.isTimeBasedType(fieldType); this.isNumeric = OpenSearchTypeFactory.isNumericType(fieldType); - // Validation: field must be either numeric or time-based + // Reject truly unsupported types (e.g., BOOLEAN, ARRAY, MAP) if (!isNumeric && !isTimeBased) { throw new SemanticCheckException( - String.format( - "Cannot apply binning: field '%s' is non-numeric and not time-related, expected" - + " numeric or time-related type", - fieldName)); + String.format("Cannot apply binning to field '%s': unsupported type", fieldName)); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java index 7422a26f0b7..bfc91a9a2de 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java @@ -6,7 +6,6 @@ package org.opensearch.sql.calcite.utils.binning.handlers; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.CountBin; @@ -16,7 +15,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for bins-based (count) binning operations. */ public class CountBinHandler implements BinHandler { @@ -40,7 +40,9 @@ public RexNode createExpression( // Calculate data range using window functions RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Convert start/end parameters RexNode startValue = convertParameter(countBin.getStart(), context); @@ -49,8 +51,13 @@ public RexNode createExpression( // WIDTH_BUCKET(field_value, num_bins, data_range, max_value) RexNode numBins = context.relBuilder.literal(requestedBins); - return context.rexBuilder.makeCall( - PPLBuiltinOperators.WIDTH_BUCKET, fieldExpr, numBins, dataRange, maxValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.WIDTH_BUCKET, + fieldExpr, + numBins, + dataRange, + maxValue); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java index e68477a9566..376e458049a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/DefaultBinHandler.java @@ -16,6 +16,8 @@ import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; import org.opensearch.sql.calcite.utils.binning.RangeFormatter; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for default binning when no parameters are specified. */ public class DefaultBinHandler implements BinHandler { @@ -45,7 +47,9 @@ private RexNode createNumericDefaultBinning(RexNode fieldExpr, CalcitePlanContex // Calculate data range RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Calculate magnitude-based width RexNode log10Range = context.relBuilder.call(SqlStdOperatorTable.LOG10, dataRange); @@ -60,17 +64,21 @@ private RexNode createNumericDefaultBinning(RexNode fieldExpr, CalcitePlanContex // Calculate bin value RexNode binStartValue = calculateBinValue(fieldExpr, widthInt, context); RexNode binEndValue = - context.relBuilder.call(SqlStdOperatorTable.PLUS, binStartValue, widthInt); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, binStartValue, widthInt); return RangeFormatter.createRangeString(binStartValue, binEndValue, context); } private RexNode calculateBinValue(RexNode fieldExpr, RexNode width, CalcitePlanContext context) { - RexNode divided = context.relBuilder.call(SqlStdOperatorTable.DIVIDE, fieldExpr, width); + RexNode divided = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, fieldExpr, width); RexNode floored = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); - return context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, floored, width); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, floored, width); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java index 9bad71c52f9..660530bb251 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/LogSpanHelper.java @@ -11,6 +11,8 @@ import org.opensearch.sql.calcite.utils.binning.BinConstants; import org.opensearch.sql.calcite.utils.binning.RangeFormatter; import org.opensearch.sql.calcite.utils.binning.SpanInfo; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Helper for creating logarithmic span expressions. */ public class LogSpanHelper { @@ -31,14 +33,19 @@ public RexNode createLogSpanExpression( RexNode adjustedField = fieldExpr; if (coefficient != 1.0) { adjustedField = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, fieldExpr, context.relBuilder.literal(coefficient)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + fieldExpr, + context.relBuilder.literal(coefficient)); } // Calculate log_base(adjusted_field) RexNode lnField = context.relBuilder.call(SqlStdOperatorTable.LN, adjustedField); RexNode lnBase = context.relBuilder.literal(Math.log(base)); - RexNode logValue = context.relBuilder.call(SqlStdOperatorTable.DIVIDE, lnField, lnBase); + RexNode logValue = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, lnField, lnBase); // Get bin number RexNode binNumber = context.relBuilder.call(SqlStdOperatorTable.FLOOR, logValue); @@ -49,15 +56,20 @@ public RexNode createLogSpanExpression( RexNode basePowerBin = context.relBuilder.call(SqlStdOperatorTable.POWER, baseNode, binNumber); RexNode lowerBound = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, coefficientNode, basePowerBin); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, coefficientNode, basePowerBin); RexNode binPlusOne = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, binNumber, context.relBuilder.literal(1.0)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + binNumber, + context.relBuilder.literal(1.0)); RexNode basePowerBinPlusOne = context.relBuilder.call(SqlStdOperatorTable.POWER, baseNode, binPlusOne); RexNode upperBound = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, coefficientNode, basePowerBinPlusOne); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, coefficientNode, basePowerBinPlusOne); // Create range string RexNode rangeStr = RangeFormatter.createRangeString(lowerBound, upperBound, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java index 16e11b7abce..a193d7e5c91 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/MinSpanBinHandler.java @@ -9,7 +9,6 @@ import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.MinSpanBin; @@ -18,7 +17,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for minspan-based binning operations. */ public class MinSpanBinHandler implements BinHandler { @@ -51,7 +51,9 @@ public RexNode createExpression( // Calculate data range using window functions RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex(); RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex(); - RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue); + RexNode dataRange = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, maxValue, minValue); // Convert start/end parameters RexNode startValue = convertParameter(minSpanBin.getStart(), context); @@ -60,8 +62,13 @@ public RexNode createExpression( // MINSPAN_BUCKET(field_value, min_span, data_range, max_value) RexNode minSpanParam = context.relBuilder.literal(minspan); - return context.rexBuilder.makeCall( - PPLBuiltinOperators.MINSPAN_BUCKET, fieldExpr, minSpanParam, dataRange, maxValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MINSPAN_BUCKET, + fieldExpr, + minSpanParam, + dataRange, + maxValue); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java index 76494dc0435..e14acfb729d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/NumericSpanHelper.java @@ -7,7 +7,8 @@ import org.apache.calcite.rex.RexNode; import org.opensearch.sql.calcite.CalcitePlanContext; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Helper for creating numeric span expressions. */ public class NumericSpanHelper { @@ -32,6 +33,7 @@ private RexNode createExpression( RexNode fieldExpr, RexNode spanValue, CalcitePlanContext context) { // SPAN_BUCKET(field_value, span_value) - return context.rexBuilder.makeCall(PPLBuiltinOperators.SPAN_BUCKET, fieldExpr, spanValue); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SPAN_BUCKET, fieldExpr, spanValue); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java index aa726cb9dbb..585e9234dc6 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/RangeBinHandler.java @@ -13,7 +13,8 @@ import org.opensearch.sql.calcite.utils.binning.BinFieldValidator; import org.opensearch.sql.calcite.utils.binning.BinHandler; import org.opensearch.sql.calcite.utils.binning.BinnableField; -import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for range-based binning (start/end parameters only). */ public class RangeBinHandler implements BinHandler { @@ -43,8 +44,14 @@ public RexNode createExpression( RexNode endParam = convertParameter(rangeBin.getEnd(), context, visitor); // Use RANGE_BUCKET with data bounds and user parameters - return context.rexBuilder.makeCall( - PPLBuiltinOperators.RANGE_BUCKET, fieldExpr, dataMin, dataMax, startParam, endParam); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.RANGE_BUCKET, + fieldExpr, + dataMin, + dataMax, + startParam, + endParam); } private RexNode convertParameter( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java index 7bdcf269593..7227bc732b3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/AlignmentHandler.java @@ -8,7 +8,9 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for time alignment operations (@d, @d+offset, epoch alignment). */ public class AlignmentHandler { @@ -30,18 +32,22 @@ public static RexNode createEpochAlignedSpan( // SPL Universal Formula: bin_start = reference + floor((timestamp - reference) / span) * span RexNode timeOffset = - context.relBuilder.call(SqlStdOperatorTable.MINUS, epochSeconds, referenceTimestamp); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, epochSeconds, referenceTimestamp); RexNode binNumber = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, timeOffset, intervalLiteral)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, timeOffset, intervalLiteral)); RexNode binOffset = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.PLUS, referenceTimestamp, binOffset); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, referenceTimestamp, binOffset); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } @@ -75,19 +81,24 @@ public static RexNode createTimeModifierAlignedSpan( RexNode daysSinceEpoch = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, earliestTimestamp, secondsPerDay)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + earliestTimestamp, + secondsPerDay)); RexNode startOfEarliestDay = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, daysSinceEpoch, secondsPerDay); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, daysSinceEpoch, secondsPerDay); // Calculate alignment reference point RexNode alignmentReference; if (offsetMillis != 0) { long offsetSeconds = offsetMillis / 1000L; alignmentReference = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, startOfEarliestDay, context.relBuilder.literal(offsetSeconds)); } else { @@ -96,27 +107,33 @@ public static RexNode createTimeModifierAlignedSpan( // Apply SPL Universal Formula RexNode timeOffset = - context.relBuilder.call(SqlStdOperatorTable.MINUS, epochSeconds, alignmentReference); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, epochSeconds, alignmentReference); RexNode binNumber = context.relBuilder.call( SqlStdOperatorTable.FLOOR, - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, timeOffset, intervalLiteral)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, timeOffset, intervalLiteral)); RexNode binOffset = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.PLUS, alignmentReference, binOffset); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.ADD, alignmentReference, binOffset); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } else { // No day alignment RexNode divided = - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, epochSeconds, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, epochSeconds, intervalLiteral); RexNode binNumber = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); RexNode binStartSeconds = - context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, binNumber, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, binNumber, intervalLiteral); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binStartSeconds); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java index d233d14c42c..d3f160eb15a 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/DaySpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for day-based time spans. */ public class DaySpanHandler { @@ -38,6 +40,7 @@ private RexNode calculateBinStart(RexNode value, int interval, CalcitePlanContex RexNode intervalLiteral = context.relBuilder.literal(interval); RexNode positionInCycle = context.relBuilder.call(SqlStdOperatorTable.MOD, value, intervalLiteral); - return context.relBuilder.call(SqlStdOperatorTable.MINUS, value, positionInCycle); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, value, positionInCycle); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java index 7317ef565c9..a92dea0b5d5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/MonthSpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for month-based time spans using SPL Monthly Binning Algorithm. */ public class MonthSpanHandler { @@ -36,12 +38,17 @@ public RexNode createExpression( context.rexBuilder.makeCall( PPLBuiltinOperators.MAKEDATE, binStartYear, - context.rexBuilder.makeCall( - SqlStdOperatorTable.PLUS, - context.rexBuilder.makeCall( - SqlStdOperatorTable.MULTIPLY, - context.rexBuilder.makeCall( - SqlStdOperatorTable.MINUS, binStartMonth, context.relBuilder.literal(1)), + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + binStartMonth, + context.relBuilder.literal(1)), context.relBuilder.literal(31)), context.relBuilder.literal(1))); @@ -52,38 +59,52 @@ public RexNode createExpression( private RexNode calculateMonthsSinceEpoch( RexNode inputYear, RexNode inputMonth, CalcitePlanContext context) { RexNode yearsSinceEpoch = - context.relBuilder.call( - SqlStdOperatorTable.MINUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, inputYear, context.relBuilder.literal(BinConstants.UNIX_EPOCH_YEAR)); RexNode monthsFromYears = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, yearsSinceEpoch, context.relBuilder.literal(12)); - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + yearsSinceEpoch, + context.relBuilder.literal(12)); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, monthsFromYears, - context.relBuilder.call( - SqlStdOperatorTable.MINUS, inputMonth, context.relBuilder.literal(1))); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + inputMonth, + context.relBuilder.literal(1))); } private RexNode calculateBinStart(RexNode value, int interval, CalcitePlanContext context) { RexNode intervalLiteral = context.relBuilder.literal(interval); RexNode positionInCycle = context.relBuilder.call(SqlStdOperatorTable.MOD, value, intervalLiteral); - return context.relBuilder.call(SqlStdOperatorTable.MINUS, value, positionInCycle); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.SUBTRACT, value, positionInCycle); } private RexNode calculateBinStartYear(RexNode binStartMonths, CalcitePlanContext context) { - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, context.relBuilder.literal(BinConstants.UNIX_EPOCH_YEAR), - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, binStartMonths, context.relBuilder.literal(12))); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + binStartMonths, + context.relBuilder.literal(12))); } private RexNode calculateBinStartMonth(RexNode binStartMonths, CalcitePlanContext context) { - return context.relBuilder.call( - SqlStdOperatorTable.PLUS, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, context.relBuilder.call( SqlStdOperatorTable.MOD, binStartMonths, context.relBuilder.literal(12)), context.relBuilder.literal(1)); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java index dd0150a0b54..3eaaa7d2082 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/StandardTimeSpanHandler.java @@ -9,7 +9,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.utils.binning.BinConstants; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLBuiltinOperators; +import org.opensearch.sql.expression.function.PPLFuncImpTable; /** Handler for standard time units (microseconds through hours). */ public class StandardTimeSpanHandler { @@ -34,8 +36,11 @@ public RexNode createExpression( // Add back alignment offset if (alignmentOffset != 0) { binValue = - context.relBuilder.call( - SqlStdOperatorTable.PLUS, binValue, context.relBuilder.literal(alignmentOffset)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.ADD, + binValue, + context.relBuilder.literal(alignmentOffset)); } // Convert back to timestamp @@ -51,20 +56,25 @@ private RexNode convertToTargetUnit( // For sub-second units, work in milliseconds if (isSubSecondUnit(config)) { RexNode epochMillis = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, epochSeconds, context.relBuilder.literal(1000L)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, + epochSeconds, + context.relBuilder.literal(1000L)); if (config.getDivisionFactor() == 1) { return epochMillis; } else if (config.getDivisionFactor() > 1) { - return context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, epochMillis, context.relBuilder.literal(config.getDivisionFactor())); } else { // Microseconds - return context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, epochMillis, context.relBuilder.literal(BinConstants.MICROS_PER_MILLI)); } @@ -73,8 +83,9 @@ private RexNode convertToTargetUnit( if (config.getDivisionFactor() == 1) { return epochSeconds; } else { - return context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, epochSeconds, context.relBuilder.literal(config.getDivisionFactor())); } @@ -90,22 +101,27 @@ private RexNode convertFromTargetUnit( binMillis = binValue; } else if (config.getDivisionFactor() > 1) { binMillis = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, binValue, context.relBuilder.literal(config.getDivisionFactor())); } else { // Microseconds binMillis = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, binValue, context.relBuilder.literal(BinConstants.MICROS_PER_MILLI)); } RexNode binSeconds = - context.relBuilder.call( - SqlStdOperatorTable.DIVIDE, binMillis, context.relBuilder.literal(1000L)); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.DIVIDE, + binMillis, + context.relBuilder.literal(1000L)); return context.rexBuilder.makeCall(PPLBuiltinOperators.FROM_UNIXTIME, binSeconds); } else { @@ -114,8 +130,9 @@ private RexNode convertFromTargetUnit( binSeconds = binValue; } else { binSeconds = - context.relBuilder.call( - SqlStdOperatorTable.MULTIPLY, + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.MULTIPLY, binValue, context.relBuilder.literal(config.getDivisionFactor())); } @@ -129,17 +146,22 @@ private RexNode applyAlignmentOffset( if (alignmentOffset == 0) { return epochValue; } - return context.relBuilder.call( - SqlStdOperatorTable.MINUS, epochValue, context.relBuilder.literal(alignmentOffset)); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.SUBTRACT, + epochValue, + context.relBuilder.literal(alignmentOffset)); } private RexNode performBinning( RexNode adjustedValue, int intervalValue, CalcitePlanContext context) { RexNode intervalLiteral = context.relBuilder.literal(intervalValue); RexNode divided = - context.relBuilder.call(SqlStdOperatorTable.DIVIDE, adjustedValue, intervalLiteral); + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.DIVIDE, adjustedValue, intervalLiteral); RexNode floored = context.relBuilder.call(SqlStdOperatorTable.FLOOR, divided); - return context.relBuilder.call(SqlStdOperatorTable.MULTIPLY, floored, intervalLiteral); + return PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, BuiltinFunctionName.MULTIPLY, floored, intervalLiteral); } private long convertAlignmentOffset(long offsetMillis, TimeUnitConfig config) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index df3ae06d5c8..bba86b92ae3 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -59,6 +59,12 @@ public enum BuiltinFunctionName { TAN(FunctionName.of("tan")), SPAN(FunctionName.of("span")), + /** Binning Functions. */ + SPAN_BUCKET(FunctionName.of("span_bucket")), + WIDTH_BUCKET(FunctionName.of("width_bucket")), + MINSPAN_BUCKET(FunctionName.of("minspan_bucket")), + RANGE_BUCKET(FunctionName.of("range_bucket")), + /** Collection functions */ ARRAY(FunctionName.of("array")), ARRAY_LENGTH(FunctionName.of("array_length")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index f758427c6c3..406962e211e 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -136,6 +136,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINSPAN_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_DAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINUTE_OF_HOUR; @@ -166,6 +167,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RADIANS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RANGE_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH; @@ -189,6 +191,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SINH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SQRT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_POP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_SAMP; @@ -231,6 +234,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKDAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKOFYEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK_OF_YEAR; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WIDTH_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.XOR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK; @@ -708,6 +712,11 @@ void populate() { SUBTRACT, SqlStdOperatorTable.MINUS, PPLTypeChecker.wrapFamily((FamilyOperandTypeChecker) OperandTypes.NUMERIC_NUMERIC)); + // Add DATETIME-DATETIME variant for timestamp binning support + registerOperator( + SUBTRACT, + SqlStdOperatorTable.MINUS, + PPLTypeChecker.family(SqlTypeFamily.DATETIME, SqlTypeFamily.DATETIME)); registerOperator(MULTIPLY, SqlStdOperatorTable.MULTIPLY); registerOperator(MULTIPLYFUNCTION, SqlStdOperatorTable.MULTIPLY); registerOperator(TRUNCATE, SqlStdOperatorTable.TRUNCATE); @@ -850,6 +859,10 @@ void populate() { registerOperator(EXPM1, PPLBuiltinOperators.EXPM1); registerOperator(RINT, PPLBuiltinOperators.RINT); registerOperator(SPAN, PPLBuiltinOperators.SPAN); + registerOperator(SPAN_BUCKET, PPLBuiltinOperators.SPAN_BUCKET); + registerOperator(WIDTH_BUCKET, PPLBuiltinOperators.WIDTH_BUCKET); + registerOperator(MINSPAN_BUCKET, PPLBuiltinOperators.MINSPAN_BUCKET); + registerOperator(RANGE_BUCKET, PPLBuiltinOperators.RANGE_BUCKET); registerOperator(E, PPLBuiltinOperators.E); registerOperator(CONV, PPLBuiltinOperators.CONV); registerOperator(MOD, PPLBuiltinOperators.MOD); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java index c1962266248..08daf9c314b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java @@ -66,7 +66,7 @@ public static boolean dateRelatedType(RelDataType type) { @Override public UDFOperandMetadata getOperandMetadata() { - return PPLOperandTypes.NUMERIC_NUMERIC_NUMERIC_NUMERIC; + return PPLOperandTypes.WIDTH_BUCKET_OPERAND; } public static class WidthBucketImplementor implements NotNullImplementor { diff --git a/docs/user/ppl/cmd/bin.rst b/docs/user/ppl/cmd/bin.rst index 1ebdc3f897e..13fad6527fb 100644 --- a/docs/user/ppl/cmd/bin.rst +++ b/docs/user/ppl/cmd/bin.rst @@ -16,13 +16,13 @@ bin Description ============ -| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric field and generates a new field with values that represent the lower bound of each bucket. +| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. Syntax ============ bin [span=] [minspan=] [bins=] [aligntime=(earliest | latest | )] [start=] [end=] -* field: mandatory. The numeric field to bin. +* field: mandatory. The field to bin. Accepts numeric or time-based fields. * span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters. * minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. * bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. @@ -530,3 +530,18 @@ PPL query:: | 28.0-29.0 | 13 | +-----------+----------------+ + +Example 20: Binning with string fields +============================================== + +PPL query:: + + os> source=accounts | eval age_str = CAST(age AS STRING) | bin age_str bins=3 | stats count() by age_str | sort age_str; + fetched rows / total rows = 2/2 + +---------+---------+ + | count() | age_str | + |---------+---------| + | 1 | 20-30 | + | 3 | 30-40 | + +---------+---------+ + diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index ef8abc3cba0..bb326dc39a7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.remote; import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.legacy.TestsConstants.*; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -507,145 +508,6 @@ public void testBinWithNonExistentField() { errorMessage.contains("non_existent_field") || errorMessage.contains("not found")); } - @Test - public void testBinWithMinspanOnNonNumericField() { - // Test that bin command with minspan throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format( - "source=%s | bin firstname minspan=10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'firstname' is non-numeric and not time-related, expected" - + " numeric or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithSpanOnNonNumericField() { - // Test that bin command with span throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin lastname span=5 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'lastname' is non-numeric and not time-related, expected" - + " numeric or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithBinsOnNonNumericField() { - // Test that bin command with bins throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin state bins=10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'state' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinWithStartEndOnNonNumericField() { - // Test that bin command with start/end throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format( - "source=%s | bin city start=0 end=100 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'city' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinDefaultOnNonNumericField() { - // Test that default bin (no parameters) throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery(String.format("source=%s | bin email | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'email' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - - @Test - public void testBinLogSpanOnNonNumericField() { - // Test that bin command with log span throws clear error for non-numeric field - ResponseException exception = - assertThrows( - ResponseException.class, - () -> { - executeQuery( - String.format("source=%s | bin gender span=log10 | head 1", TEST_INDEX_ACCOUNT)); - }); - - // Get the full error message - String errorMessage = exception.getMessage(); - - // Verify the error message is clear and specific - String expectedMessage = - "Cannot apply binning: field 'gender' is non-numeric and not time-related, expected numeric" - + " or time-related type"; - assertTrue( - "Error message should contain: '" + expectedMessage + "'", - errorMessage.contains(expectedMessage)); - } - @Test public void testBinSpanWithStartEndNeverShrinkRange() throws IOException { JSONObject result = diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml index 53cd91e02ca..9a82afe29ec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_bin_aligntime.yaml @@ -3,8 +3,8 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(category=[$0], value=[$1], timestamp=[$2], @timestamp=[$9]) LogicalSort(fetch=[5]) - LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(/(/(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) + LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[UNIX_TIMESTAMP($t3)], expr#5=[3600], expr#6=[/($t4, $t5)], expr#7=[2], expr#8=[/($t6, $t7)], expr#9=[FLOOR($t8)], expr#10=[*($t9, $t7)], expr#11=[*($t10, $t5)], expr#12=[FROM_UNIXTIME($t11)], proj#0..2=[{exprs}], $f3=[$t12]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp, @timestamp], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["category","value","timestamp","@timestamp"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[UNIX_TIMESTAMP($t3)], expr#5=[3600], expr#6=[DIVIDE($t4, $t5)], expr#7=[2], expr#8=[DIVIDE($t6, $t7)], expr#9=[FLOOR($t8)], expr#10=[*($t9, $t7)], expr#11=[*($t10, $t5)], expr#12=[FROM_UNIXTIME($t11)], proj#0..2=[{exprs}], $f3=[$t12]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[category, value, timestamp, @timestamp], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["category","value","timestamp","@timestamp"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml index 282a080733e..e8a727cf1e3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_bin_aligntime.yaml @@ -3,10 +3,10 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(category=[$0], value=[$1], timestamp=[$2], @timestamp=[$9]) LogicalSort(fetch=[5]) - LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(/(/(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) + LogicalProject(category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], @timestamp=[FROM_UNIXTIME(*(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($0), 3600), 2)), 2), 3600))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..9=[{inputs}], expr#10=[UNIX_TIMESTAMP($t0)], expr#11=[3600], expr#12=[/($t10, $t11)], expr#13=[2], expr#14=[/($t12, $t13)], expr#15=[FLOOR($t14)], expr#16=[*($t15, $t13)], expr#17=[*($t16, $t11)], expr#18=[FROM_UNIXTIME($t17)], category=[$t1], value=[$t2], timestamp=[$t3], @timestamp=[$t18]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[UNIX_TIMESTAMP($t0)], expr#11=[3600], expr#12=[DIVIDE($t10, $t11)], expr#13=[2], expr#14=[DIVIDE($t12, $t13)], expr#15=[FLOOR($t14)], expr#16=[*($t15, $t13)], expr#17=[*($t16, $t11)], expr#18=[FROM_UNIXTIME($t17)], category=[$t1], value=[$t2], timestamp=[$t3], @timestamp=[$t18]) EnumerableLimit(fetch=[5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) \ No newline at end of file diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml new file mode 100644 index 00000000000..5fdb4198abe --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4740.yml @@ -0,0 +1,120 @@ +setup: + - do: + indices.create: + index: test_binning_4740 + body: + mappings: + properties: + "@timestamp": + type: date + "age": + type: keyword + "balance": + type: keyword + "name": + type: keyword + - do: + bulk: + index: test_binning_4740 + refresh: true + body: + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:00:00.000Z","age":"25","balance":"1000.0","name":"Alice"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:05:00.000Z","age":"30","balance":"2000.0","name":"Bob"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:10:00.000Z","age":"35","balance":"3000.0","name":"Charlie"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:15:00.000Z","age":"40","balance":"4000.0","name":"David"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-01T00:20:00.000Z","age":"45","balance":"5000.0","name":"Eve"}' + +--- +"bin with numeric field using WIDTH_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age bins=3 | stats count() by age | sort age + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } + +--- +"bin with numeric span using SPAN_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age span=10 | stats count() by age | sort age + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } + +--- +"bin with minspan using MINSPAN_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin balance minspan=1000 | stats count() by balance | sort balance + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "balance", "type": "string" } ] } + - match: { "datarows": [ [ 1, "1000-2000" ], [ 1, "2000-3000" ], [ 1, "3000-4000" ], [ 1, "4000-5000" ], [ 1, "5000-6000" ] ] } + +--- +"bin with start and end using RANGE_BUCKET - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin age start=20 end=50 | stats count() by age | sort age + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "age", "type": "string" } ] } + - match: { "datarows": [ [ 1, "20-30" ], [ 2, "30-40" ], [ 2, "40-50" ] ] } + +--- +"bin with default binning (no parameters) on string field - issue 4740": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_binning_4740 | bin balance | stats count() by balance | sort balance + + - match: { "schema": [ { "name": "count()", "type": "bigint" }, { "name": "balance", "type": "string" } ] } + - match: { "datarows": [ [ 1, "1000.0-2000.0" ], [ 1, "2000.0-3000.0" ], [ 1, "3000.0-4000.0" ], [ 1, "4000.0-5000.0" ], [ 1, "5000.0-6000.0" ] ] } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index ddb22092c99..0d933fe1649 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -5,6 +5,9 @@ package org.opensearch.sql.ppl.calcite; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; import org.junit.Test; @@ -39,8 +42,7 @@ public void testBinWithBins() { String ppl = "source=EMP | bin SAL bins=10"; RelNode root = getRelNode(ppl); - // Note: WIDTH_BUCKET uses window functions without ROWS UNBOUNDED PRECEDING in the actual - // output + // Note: WIDTH_BUCKET uses window functions and now properly resolves via PPLFuncImpTable verifyLogical( root, "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], " @@ -48,6 +50,15 @@ public void testBinWithBins() { + "-(MAX($5) OVER (), MIN($5) OVER ()), " + "MAX($5) OVER ())])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `WIDTH_BUCKET`(`SAL`," + + " 10, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) -" + + " (MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))," + + " MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))" + + " `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -64,6 +75,15 @@ public void testBinWithMinspan() { + "-(MAX($5) OVER (), MIN($5) OVER ()), " + "MAX($5) OVER ())])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`," + + " `MINSPAN_BUCKET`(`SAL`, 1.000E2, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " PRECEDING AND UNBOUNDED FOLLOWING)) `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -79,6 +99,37 @@ public void testBinWithStartEnd() { + "MIN($5) OVER (), MAX($5) OVER (), " + "1000, 5000)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `RANGE_BUCKET`(`SAL`," + + " MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)," + + " MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 1000," + + " 5000) `SAL`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testBinWithTimestampFieldUsingBins() { + String ppl = "source=products_temporal | bin SYS_START bins=10"; + RelNode root = getRelNode(ppl); + + // WIDTH_BUCKET with timestamp field + // The third parameter (data_range) is a STRING interval, not numeric + verifyLogical( + root, + "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3], SYS_START=[WIDTH_BUCKET($2, 10, " + + "-(MAX($2) OVER (), MIN($2) OVER ()), " + + "MAX($2) OVER ())])\n" + + " LogicalTableScan(table=[[scott, products_temporal]])\n"); + + String expectedSparkSql = + "SELECT `ID`, `SUPPLIER`, `SYS_END`, `WIDTH_BUCKET`(`SYS_START`, 10, (MAX(`SYS_START`) OVER" + + " (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SYS_START`)" + + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SYS_START`)" + + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) `SYS_START`\n" + + "FROM `scott`.`products_temporal`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @Test @@ -90,13 +141,15 @@ public void testBinWithTimeSpan() { verifyLogical( root, "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3]," - + " SYS_START=[FROM_UNIXTIME(*(FLOOR(/(/(UNIX_TIMESTAMP($2), 3600), 1)), 3600))])\n" + + " SYS_START=[FROM_UNIXTIME(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($2), 3600), 1))," + + " 3600))])\n" + " LogicalTableScan(table=[[scott, products_temporal]])\n"); verifyPPLToSparkSQL( root, - "SELECT `ID`, `SUPPLIER`, `SYS_END`, `FROM_UNIXTIME`(FLOOR(`UNIX_TIMESTAMP`(`SYS_START`) /" - + " 3600 / 1) * 3600) `SYS_START`\n" + "SELECT `ID`, `SUPPLIER`, `SYS_END`," + + " `FROM_UNIXTIME`(FLOOR(`DIVIDE`(`DIVIDE`(`UNIX_TIMESTAMP`(`SYS_START`), 3600), 1)) *" + + " 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } @@ -110,43 +163,26 @@ public void testBinWithAligntime() { verifyLogical( root, "LogicalProject(ID=[$0], SUPPLIER=[$1], SYS_END=[$3]," - + " SYS_START=[FROM_UNIXTIME(*(FLOOR(/(/(UNIX_TIMESTAMP($2), 3600), 1)), 3600))])\n" + + " SYS_START=[FROM_UNIXTIME(*(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP($2), 3600), 1))," + + " 3600))])\n" + " LogicalTableScan(table=[[scott, products_temporal]])\n"); verifyPPLToSparkSQL( root, - "SELECT `ID`, `SUPPLIER`, `SYS_END`, `FROM_UNIXTIME`(FLOOR(`UNIX_TIMESTAMP`(`SYS_START`) /" - + " 3600 / 1) * 3600) `SYS_START`\n" + "SELECT `ID`, `SUPPLIER`, `SYS_END`," + + " `FROM_UNIXTIME`(FLOOR(`DIVIDE`(`DIVIDE`(`UNIX_TIMESTAMP`(`SYS_START`), 3600), 1)) *" + + " 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } - @Test(expected = SemanticCheckException.class) - public void testBinWithMinspanOnNonNumericField() { - String ppl = "source=EMP | bin ENAME minspan=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithSpanOnNonNumericField() { - String ppl = "source=EMP | bin JOB span=5"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithBinsOnNonNumericField() { - String ppl = "source=EMP | bin ENAME bins=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinWithStartEndOnNonNumericField() { - String ppl = "source=EMP | bin JOB start=1 end=10"; - getRelNode(ppl); // Should throw SemanticCheckException - } - - @Test(expected = SemanticCheckException.class) - public void testBinDefaultOnNonNumericField() { - String ppl = "source=EMP | bin ENAME"; - getRelNode(ppl); // Should throw SemanticCheckException + @Test + public void testBinOnNonBinnableType() { + // Test that binning on truly unsupported types (not numeric, time, or string) fails + String ppl = "source=products_temporal | eval bool_field = true | bin bool_field bins=3"; + + SemanticCheckException exception = + assertThrows(SemanticCheckException.class, () -> getRelNode(ppl)); + assertTrue(exception.getMessage().contains("Cannot apply binning")); + assertTrue(exception.getMessage().contains("unsupported type")); } } From efec652706cae0185be6d2e5af7bbc14234e2f51 Mon Sep 17 00:00:00 2001 From: Xinyuan Lu Date: Wed, 12 Nov 2025 14:12:49 +0800 Subject: [PATCH 41/99] Support `appendpipe`command in PPL (#4602) * add demos Signed-off-by: xinyual * add missing column Signed-off-by: xinyual * add appendpipe poc Signed-off-by: xinyual * slighty change syntax Signed-off-by: xinyual * add unresolved plan Signed-off-by: xinyual * add IT Signed-off-by: xinyual * add tests Signed-off-by: xinyual * remove useless ut Signed-off-by: xinyual * fix conflict Signed-off-by: xinyual * remove useless code Signed-off-by: xinyual * remove useless code Signed-off-by: xinyual * remove useless code Signed-off-by: xinyual * apply spotless Signed-off-by: xinyual * remove useless chaneg Signed-off-by: xinyual * add explain IT Signed-off-by: xinyual * fix IT Signed-off-by: xinyual * apply spotless Signed-off-by: xinyual * add doc Signed-off-by: xinyual * optimize doc Signed-off-by: xinyual * add UT Signed-off-by: xinyual * fix IT due to performance change Signed-off-by: xinyual * add multiply children check Signed-off-by: xinyual --------- Signed-off-by: xinyual --- .../org/opensearch/sql/analysis/Analyzer.java | 6 ++ .../sql/ast/AbstractNodeVisitor.java | 5 ++ .../org/opensearch/sql/ast/dsl/AstDSL.java | 6 ++ .../opensearch/sql/ast/tree/AppendPipe.java | 45 ++++++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 29 +++++- docs/user/ppl/cmd/appendpipe.rst | 72 +++++++++++++++ .../sql/calcite/remote/CalciteExplainIT.java | 12 +++ .../remote/CalcitePPLAppendPipeCommandIT.java | 90 +++++++++++++++++++ .../calcite/explain_appendpipe_command.json | 6 ++ .../explain_appendpipe_command.json | 6 ++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 10 +++ .../opensearch/sql/ppl/parser/AstBuilder.java | 17 ++++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 14 +++ .../ppl/calcite/CalcitePPLAppendPipeTest.java | 62 +++++++++++++ .../sql/ppl/parser/AstBuilderTest.java | 15 ++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 13 +++ 17 files changed, 408 insertions(+), 1 deletion(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/AppendPipe.java create mode 100644 docs/user/ppl/cmd/appendpipe.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_appendpipe_command.json create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index f7017565595..f20e95c0382 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -60,6 +60,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; +import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; @@ -833,6 +834,11 @@ public LogicalPlan visitAppendCol(AppendCol node, AnalysisContext context) { throw getOnlyForCalciteException("Appendcol"); } + @Override + public LogicalPlan visitAppendPipe(AppendPipe node, AnalysisContext context) { + throw getOnlyForCalciteException("AppendPipe"); + } + @Override public LogicalPlan visitAppend(Append node, AnalysisContext context) { throw getOnlyForCalciteException("Append"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 320723fd57c..2daa4b4a2f9 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -48,6 +48,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; +import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; @@ -140,6 +141,10 @@ public T visitSearch(Search node, C context) { return visitChildren(node, context); } + public T visitAppendPipe(AppendPipe node, C context) { + return visitChildren(node, context); + } + public T visitFilter(Filter node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 67cc893c5b0..4e87502e73c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -49,6 +49,7 @@ import org.opensearch.sql.ast.expression.WindowFunction; import org.opensearch.sql.ast.expression.Xor; import org.opensearch.sql.ast.tree.Aggregation; +import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; @@ -563,6 +564,11 @@ public static Trendline trendline( return new Trendline(sortField, Arrays.asList(computations)).attach(input); } + public static AppendPipe appendPipe(UnresolvedPlan input, UnresolvedPlan subquery) { + + return new AppendPipe(subquery).attach(input); + } + public static Trendline.TrendlineComputation computation( Integer numDataPoints, Field dataField, String alias, Trendline.TrendlineType type) { return new Trendline.TrendlineComputation(numDataPoints, dataField, alias, type); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/AppendPipe.java b/core/src/main/java/org/opensearch/sql/ast/tree/AppendPipe.java new file mode 100644 index 00000000000..0ea1cb9b453 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/AppendPipe.java @@ -0,0 +1,45 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +public class AppendPipe extends UnresolvedPlan { + + private UnresolvedPlan subQuery; + + private UnresolvedPlan child; + + public AppendPipe(UnresolvedPlan subQuery) { + this.subQuery = subQuery; + } + + @Override + public AppendPipe attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitAppendPipe(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 94086952610..415d47a0fd7 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -105,6 +105,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; +import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; @@ -246,6 +247,28 @@ public RelNode visitFilter(Filter node, CalcitePlanContext context) { return context.relBuilder.peek(); } + @Override + public RelNode visitAppendPipe(AppendPipe node, CalcitePlanContext context) { + visitChildren(node, context); + UnresolvedPlan subqueryPlan = node.getSubQuery(); + UnresolvedPlan childNode = subqueryPlan; + while (childNode.getChild() != null + && !childNode.getChild().isEmpty() + && !(childNode.getChild().getFirst() instanceof Values)) { + if (childNode.getChild().size() > 1) { + throw new RuntimeException("AppendPipe doesn't support multiply children subquery."); + } + childNode = (UnresolvedPlan) childNode.getChild().getFirst(); + } + childNode.attach(node.getChild().getFirst()); + + subqueryPlan.accept(this, context); + + RelNode subPipelineNode = context.relBuilder.build(); + RelNode mainNode = context.relBuilder.build(); + return mergeTableAndResolveColumnConflict(mainNode, subPipelineNode, context); + } + @Override public RelNode visitRegex(Regex node, CalcitePlanContext context) { visitChildren(node, context); @@ -2121,9 +2144,13 @@ public RelNode visitAppend(Append node, CalcitePlanContext context) { // 3. Merge two query schemas using shared logic RelNode subsearchNode = context.relBuilder.build(); RelNode mainNode = context.relBuilder.build(); + return mergeTableAndResolveColumnConflict(mainNode, subsearchNode, context); + } + private RelNode mergeTableAndResolveColumnConflict( + RelNode mainNode, RelNode subqueryNode, CalcitePlanContext context) { // Use shared schema merging logic that handles type conflicts via field renaming - List nodesToMerge = Arrays.asList(mainNode, subsearchNode); + List nodesToMerge = Arrays.asList(mainNode, subqueryNode); List projectedNodes = SchemaUnifier.buildUnifiedSchemaWithConflictResolution(nodesToMerge, context); diff --git a/docs/user/ppl/cmd/appendpipe.rst b/docs/user/ppl/cmd/appendpipe.rst new file mode 100644 index 00000000000..43c4dd1e84d --- /dev/null +++ b/docs/user/ppl/cmd/appendpipe.rst @@ -0,0 +1,72 @@ +========= +appendpipe +========= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Using ``appendpipe`` command to appends the result of the subpipeline to the search results. Unlike a subsearch, the subpipeline is not run first.The subpipeline is run when the search reaches the appendpipe command. +The command aligns columns with the same field names and types. For different column fields between the main search and sub-search, NULL values are filled in the respective rows. + +Version +======= +3.3.0 + +Syntax +============ +appendpipe [] + +* subpipeline: mandatory. A list of commands that are applied to the search results from the commands that occur in the search before the ``appendpipe`` command. + +Example 1: Append rows from a total count to existing search result +==================================================================================== + +This example appends rows from "total by gender" to "sum by gender, state" with merged column of same field name and type. + +PPL query:: + + os> source=accounts | stats sum(age) as part by gender, state | sort -part | head 5 | appendpipe [ stats sum(part) as total by gender ]; + fetched rows / total rows = 6/6 + +------+--------+-------+-------+ + | part | gender | state | total | + |------+--------+-------+-------| + | 36 | M | TN | null | + | 33 | M | MD | null | + | 32 | M | IL | null | + | 28 | F | VA | null | + | null | F | null | 28 | + | null | M | null | 101 | + +------+--------+-------+-------+ + + + +Example 2: Append rows with merged column names +=============================================================== + +This example appends rows from "count by gender" to "sum by gender, state". + +PPL query:: + + os> source=accounts | stats sum(age) as total by gender, state | sort -total | head 5 | appendpipe [ stats sum(total) as total by gender ]; + fetched rows / total rows = 6/6 + +----------+--------+-------+ + | total | gender | state | + |----------+--------+-------| + | 36 | M | TN | + | 33 | M | MD | + | 32 | M | IL | + | 28 | F | VA | + | 28 | F | null | + | 101 | M | null | + +----------+--------+-------+ + +Limitations +=========== + +* **Schema Compatibility**: Same as command ``append``, when fields with the same name exist between the main search and sub-search but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type, or use different field names (e.g., by renaming with ``eval`` or using ``fields`` to select non-conflicting columns). diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 994e583eaa8..841469115d5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -894,6 +894,18 @@ public void testExplainAppendCommand() throws IOException { TEST_INDEX_BANK))); } + @Test + public void testExplainAppendPipeCommand() throws IOException { + String expected = loadExpectedPlan("explain_appendpipe_command.json"); + assertJsonEqualsIgnoreId( + expected, + explainQueryToString( + String.format( + Locale.ROOT, + "source=%s | appendpipe [ stats count(balance) as cnt by gender ]", + TEST_INDEX_BANK))); + } + @Test public void testMvjoinExplain() throws IOException { String query = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java new file mode 100644 index 00000000000..d25d3ca80db --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendPipeCommandIT.java @@ -0,0 +1,90 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder; + +import java.io.IOException; +import java.util.Locale; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalcitePPLAppendPipeCommandIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.ACCOUNT); + loadIndex(Index.BANK); + } + + @Test + public void testAppendPipe() throws IOException { + JSONObject actual = + executeQuery( + String.format( + Locale.ROOT, + "source=%s | stats sum(age) as sum_age_by_gender by gender | appendpipe [ " + + " sort -sum_age_by_gender ] |" + + " head 5", + TEST_INDEX_ACCOUNT)); + verifySchemaInOrder(actual, schema("sum_age_by_gender", "bigint"), schema("gender", "string")); + verifyDataRows(actual, rows(14947, "F"), rows(15224, "M"), rows(15224, "M"), rows(14947, "F")); + } + + @Test + public void testAppendDifferentIndex() throws IOException { + JSONObject actual = + executeQuery( + String.format( + Locale.ROOT, + "source=%s | stats sum(age) as sum by gender | append [ source=%s | stats" + + " sum(age) as bank_sum_age ]", + TEST_INDEX_ACCOUNT, + TEST_INDEX_BANK)); + verifySchemaInOrder( + actual, + schema("sum", "bigint"), + schema("gender", "string"), + schema("bank_sum_age", "bigint")); + verifyDataRows(actual, rows(14947, "F", null), rows(15224, "M", null), rows(null, null, 238)); + } + + @Test + public void testAppendpipeWithMergedColumn() throws IOException { + JSONObject actual = + executeQuery( + String.format( + Locale.ROOT, + "source=%s | stats sum(age) as sum by gender |" + + " appendpipe [ stats sum(sum) as sum ] | head 5", + TEST_INDEX_ACCOUNT, + TEST_INDEX_ACCOUNT)); + verifySchemaInOrder(actual, schema("sum", "bigint"), schema("gender", "string")); + verifyDataRows(actual, rows(14947, "F"), rows(15224, "M"), rows(30171, null)); + } + + @Test + public void testAppendpipeWithConflictTypeColumn() throws IOException { + Exception exception = + assertThrows( + Exception.class, + () -> + executeQuery( + String.format( + Locale.ROOT, + "source=%s | stats sum(age) as sum by gender | appendpipe [ eval sum =" + + " cast(sum as double) ] | head 5", + TEST_INDEX_ACCOUNT))); + assertTrue(exception.getMessage().contains("due to incompatible types")); + } +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json new file mode 100644 index 00000000000..6ec42972a10 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], cnt=[$19])\n LogicalUnion(all=[true])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], cnt=[null:BIGINT])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[null:BIGINT], firstname=[null:VARCHAR], address=[null:VARCHAR], birthdate=[null:EXPR_TIMESTAMP VARCHAR], gender=[$0], city=[null:VARCHAR], lastname=[null:VARCHAR], balance=[null:BIGINT], employer=[null:VARCHAR], state=[null:VARCHAR], age=[null:INTEGER], email=[null:VARCHAR], male=[null:BOOLEAN], _id=[null:VARCHAR], _index=[null:VARCHAR], _score=[null:REAL], _maxscore=[null:REAL], _sort=[null:BIGINT], _routing=[null:VARCHAR], cnt=[$1])\n LogicalAggregate(group=[{0}], cnt=[COUNT($1)])\n LogicalProject(gender=[$4], balance=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", + "physical":"EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[null:BIGINT], proj#0..13=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=[null:EXPR_TIMESTAMP VARCHAR], expr#5=[null:INTEGER], expr#6=[null:BOOLEAN], account_number=[$t2], firstname=[$t3], address=[$t3], birthdate=[$t4], gender=[$t0], city=[$t3], lastname=[$t3], balance=[$t2], employer=[$t3], state=[$t3], age=[$t5], email=[$t3], male=[$t6], cnt=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_appendpipe_command.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_appendpipe_command.json new file mode 100644 index 00000000000..2b111e119db --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_appendpipe_command.json @@ -0,0 +1,6 @@ +{ + "calcite": { + "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], cnt=[$19])\n LogicalUnion(all=[true])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], cnt=[null:BIGINT])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[null:BIGINT], firstname=[null:VARCHAR], address=[null:VARCHAR], birthdate=[null:EXPR_TIMESTAMP VARCHAR], gender=[$0], city=[null:VARCHAR], lastname=[null:VARCHAR], balance=[null:BIGINT], employer=[null:VARCHAR], state=[null:VARCHAR], age=[null:INTEGER], email=[null:VARCHAR], male=[null:BOOLEAN], _id=[null:VARCHAR], _index=[null:VARCHAR], _score=[null:REAL], _maxscore=[null:REAL], _sort=[null:BIGINT], _routing=[null:VARCHAR], cnt=[$1])\n LogicalAggregate(group=[{0}], cnt=[COUNT($1)])\n LogicalProject(gender=[$4], balance=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", + "physical":"EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[null:BIGINT], proj#0..12=[{exprs}], cnt=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=[null:EXPR_TIMESTAMP VARCHAR], expr#5=[null:INTEGER], expr#6=[null:BOOLEAN], account_number=[$t2], firstname=[$t3], address=[$t3], birthdate=[$t4], gender=[$t0], city=[$t3], lastname=[$t3], balance=[$t2], employer=[$t3], state=[$t3], age=[$t5], email=[$t3], male=[$t6], cnt=[$t1])\n EnumerableAggregate(group=[{4}], cnt=[COUNT($7)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" + } +} \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index d7272a47a0f..bbd0ba94b41 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -62,6 +62,7 @@ BUFFER_LIMIT: 'BUFFER_LIMIT'; LABEL: 'LABEL'; SHOW_NUMBERED_TOKEN: 'SHOW_NUMBERED_TOKEN'; AGGREGATION: 'AGGREGATION'; +APPENDPIPE: 'APPENDPIPE'; //Native JOIN KEYWORDS JOIN: 'JOIN'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index c67ca41b5f1..dd9442995ad 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -19,6 +19,10 @@ pplStatement | queryStatement ; +subPipeline + : PIPE? commands (PIPE commands)* + ; + queryStatement : (PIPE)? pplCommands (PIPE commands)* ; @@ -80,6 +84,7 @@ commands | chartCommand | timechartCommand | rexCommand + | appendPipeCommand | replaceCommand ; @@ -120,6 +125,7 @@ commandName | APPEND | MULTISEARCH | REX + | APPENDPIPE | REPLACE ; @@ -220,6 +226,10 @@ statsCommand : STATS statsArgs statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (dedupSplitArg)? ; +appendPipeCommand + : APPENDPIPE LT_SQR_PRTHS subPipeline RT_SQR_PRTHS + ; + statsArgs : (partitionsArg | allnumArg | delimArg | bucketNullableArg)* ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 09e9b4c77ed..4566dc30a27 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -73,6 +73,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; +import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; @@ -165,6 +166,16 @@ public UnresolvedPlan visitQueryStatement(OpenSearchPPLParser.QueryStatementCont .reduce(pplCommand, (r, e) -> e.attach(e instanceof Join ? projectExceptMeta(r) : r)); } + @Override + public UnresolvedPlan visitSubPipeline(OpenSearchPPLParser.SubPipelineContext ctx) { + List cmds = ctx.commands(); + if (cmds.isEmpty()) { + throw new IllegalArgumentException("appendpipe [] is empty"); + } + UnresolvedPlan seed = visit(cmds.getFirst()); + return cmds.stream().skip(1).map(this::visit).reduce(seed, (left, op) -> op.attach(left)); + } + @Override public UnresolvedPlan visitSubSearch(OpenSearchPPLParser.SubSearchContext ctx) { UnresolvedPlan searchCommand = visit(ctx.searchCommand()); @@ -236,6 +247,12 @@ public UnresolvedPlan visitWhereCommand(WhereCommandContext ctx) { return new Filter(internalVisitExpression(ctx.logicalExpression())); } + @Override + public UnresolvedPlan visitAppendPipeCommand(OpenSearchPPLParser.AppendPipeCommandContext ctx) { + UnresolvedPlan plan = visit(ctx.subPipeline()); + return new AppendPipe(plan); + } + @Override public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ctx) { // a sql-like syntax if join criteria existed diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 0971924295a..b3b91d11b5d 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -55,6 +55,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Append; import org.opensearch.sql.ast.tree.AppendCol; +import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CountBin; @@ -711,6 +712,19 @@ private String visitExpression(UnresolvedExpression expression) { return expressionAnalyzer.analyze(expression, null); } + @Override + public String visitAppendPipe(AppendPipe node, String context) { + Values emptyValue = new Values(null); + UnresolvedPlan childNode = node.getSubQuery(); + while (childNode != null && !childNode.getChild().isEmpty()) { + childNode = (UnresolvedPlan) childNode.getChild().get(0); + } + childNode.attach(emptyValue); + String child = node.getChild().get(0).accept(this, context); + String subPipeline = anonymizeData(node.getSubQuery()); + return StringUtils.format("%s | appendpipe [%s]", child, subPipeline); + } + @Override public String visitFillNull(FillNull node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java new file mode 100644 index 00000000000..faf944da4a0 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAppendPipeTest.java @@ -0,0 +1,62 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLAppendPipeTest extends CalcitePPLAbstractTest { + public CalcitePPLAppendPipeTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testAppendPipe() { + String ppl = "source=EMP | appendpipe [ where DEPTNO = 20 ]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalFilter(condition=[=($7, 20)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 19); // 14 original table rows + 5 filtered subquery rows + + String expectedSparkSql = + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "UNION ALL\n" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 20"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testAppendPipeWithMergedColumns() { + String ppl = + "source=EMP | fields DEPTNO | appendpipe [ fields DEPTNO | eval DEPTNO_PLUS =" + + " DEPTNO + 10 ]"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalUnion(all=[true])\n" + + " LogicalProject(DEPTNO=[$7], DEPTNO_PLUS=[null:INTEGER])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(DEPTNO=[$7], DEPTNO_PLUS=[+($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 28); + + String expectedSparkSql = + "SELECT `DEPTNO`, CAST(NULL AS INTEGER) `DEPTNO_PLUS`\n" + + "FROM `scott`.`EMP`\n" + + "UNION ALL\n" + + "SELECT `DEPTNO`, `DEPTNO` + 10 `DEPTNO_PLUS`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index f1464e31065..8cc207b6561 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -13,6 +13,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.agg; import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; import static org.opensearch.sql.ast.dsl.AstDSL.alias; +import static org.opensearch.sql.ast.dsl.AstDSL.appendPipe; import static org.opensearch.sql.ast.dsl.AstDSL.argument; import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.compare; @@ -1003,6 +1004,20 @@ public void testFillNullValueWithFields() { fillNull(relation("t"), intLiteral(0), true, field("a"), field("b"), field("c"))); } + @Test + public void testAppendPipe() { + assertEqual( + "source=t | appendpipe [ stats COUNT() ]", + appendPipe( + relation("t"), + agg( + null, + exprList(alias("COUNT()", aggregate("count", AstDSL.allFields()))), + emptyList(), + emptyList(), + defaultStatsArgs()))); + } + public void testTrendline() { assertEqual( "source=t | trendline sma(5, test_field) as test_field_alias sma(1, test_field_2) as" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index ec87000b5bf..be8cffffb53 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -753,6 +753,19 @@ public void testRegex() { anonymize("source=t | regex email='.*@domain.com' | fields email")); } + @Test + public void testAppendPipe() { + assertEquals( + "source=table | appendpipe [ | stats count()]", + anonymize("source=t | appendpipe [stats count()]")); + assertEquals( + "source=table | appendpipe [ | where identifier = ***]", + anonymize("source=t | appendpipe [where fieldname=='pattern']")); + assertEquals( + "source=table | appendpipe [ | sort identifier]", + anonymize("source=t | appendpipe [sort fieldname]")); + } + @Test public void testRexCommand() { when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); From 226d685553fc4b3fae4870ea6ab2735b7333aba1 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Wed, 12 Nov 2025 15:21:48 +0800 Subject: [PATCH 42/99] Add `regexp_replace()` function as alias of `replace()` (#4765) * Support regexp_replace() function Signed-off-by: Lantao Jin * Fix IT Signed-off-by: Lantao Jin * revert import collapse Signed-off-by: Lantao Jin * change regexp_replace() as an alias of replace() Signed-off-by: Lantao Jin * fix doctest Signed-off-by: Lantao Jin * remove invalid link in doc Signed-off-by: Lantao Jin --------- Signed-off-by: Lantao Jin --- .../sql/analysis/ExpressionAnalyzer.java | 2 +- .../sql/calcite/CalciteRelNodeVisitor.java | 2 +- .../opensearch/sql/executor/QueryService.java | 3 + .../function/BuiltinFunctionName.java | 3 +- .../expression/function/PPLFuncImpTable.java | 6 +- .../org/opensearch/sql/utils/ParseUtils.java | 2 +- docs/user/ppl/functions/condition.rst | 12 +- docs/user/ppl/functions/string.rst | 26 ++++ .../calcite/clickbench/PPLClickBenchIT.java | 17 ++- .../sql/calcite/remote/CalciteExplainIT.java | 10 +- .../calcite/remote/CalciteTextFunctionIT.java | 116 ++++++++++++++---- .../sql/legacy/SQLIntegTestCase.java | 2 +- .../sql/ppl/NewAddedCommandsIT.java | 22 +++- .../opensearch/sql/ppl/TextFunctionIT.java | 2 +- .../test/resources/clickbench/queries/q29.ppl | 7 +- .../calcite/clickbench/q29.yaml | 18 +++ ...json => explain_regexp_match_in_eval.json} | 0 ...son => explain_regexp_match_in_where.json} | 0 ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 3 +- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 3 +- .../sql/ppl/parser/AstExpressionBuilder.java | 2 + .../calcite/CalcitePPLStringFunctionTest.java | 62 +++++++--- 22 files changed, 237 insertions(+), 83 deletions(-) rename integ-test/src/test/resources/expectedOutput/calcite/{explain_regex_match_in_eval.json => explain_regexp_match_in_eval.json} (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{explain_regex_match_in_where.json => explain_regexp_match_in_where.json} (100%) diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index be2788a547e..54300540766 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -233,7 +233,7 @@ private boolean isCalciteOnlyFunction(FunctionName functionName) { // Set of functions that are only supported with Calcite engine Set calciteOnlyFunctions = ImmutableSet.of( - BuiltinFunctionName.REGEX_MATCH.getName().getFunctionName(), + BuiltinFunctionName.REGEXP_MATCH.getName().getFunctionName(), BuiltinFunctionName.STRFTIME.getName().getFunctionName()); return calciteOnlyFunctions.stream() diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 415d47a0fd7..e2a0d7cd12d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -3434,7 +3434,7 @@ private RexNode createOptimizedSubstitution( // 3-parameter REGEXP_REPLACE return PPLFuncImpTable.INSTANCE.resolve( context.rexBuilder, - BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3, + BuiltinFunctionName.REPLACE, fieldRex, context.rexBuilder.makeLiteral(pattern), context.rexBuilder.makeLiteral(javaReplacement)); diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index 646bc8a7d5e..e4a6c5da21c 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -112,6 +112,9 @@ public void executeWithCalcite( } else { if (t instanceof Exception) { listener.onFailure((Exception) t); + } else if (t instanceof ExceptionInInitializerError + && ((ExceptionInInitializerError) t).getException() instanceof Exception) { + listener.onFailure((Exception) ((ExceptionInInitializerError) t).getException()); } else if (t instanceof VirtualMachineError) { // throw and fast fail the VM errors such as OOM (same with v2). throw t; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index bba86b92ae3..0fe042a5155 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -236,7 +236,7 @@ public enum BuiltinFunctionName { LTRIM(FunctionName.of("ltrim")), POSITION(FunctionName.of("position")), REGEXP(FunctionName.of("regexp")), - REGEX_MATCH(FunctionName.of("regex_match")), + REGEXP_MATCH(FunctionName.of("regexp_match")), REX_EXTRACT(FunctionName.of("REX_EXTRACT")), REX_EXTRACT_MULTI(FunctionName.of("REX_EXTRACT_MULTI")), REX_OFFSET(FunctionName.of("REX_OFFSET")), @@ -337,7 +337,6 @@ public enum BuiltinFunctionName { INTERNAL_UNCOLLECT_PATTERNS(FunctionName.of("uncollect_patterns")), INTERNAL_GROK(FunctionName.of("grok"), true), INTERNAL_PARSE(FunctionName.of("parse"), true), - INTERNAL_REGEXP_REPLACE_3(FunctionName.of("regexp_replace_3"), true), INTERNAL_REGEXP_REPLACE_PG_4(FunctionName.of("regexp_replace_pg_4"), true), INTERNAL_REGEXP_REPLACE_5(FunctionName.of("regexp_replace_5"), true), INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 406962e211e..76a6755ad52 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -83,7 +83,6 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PARSE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PATTERN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PATTERN_PARSER; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_5; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_PG_4; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_TRANSLATE3; @@ -170,7 +169,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.RANGE_BUCKET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP_MATCH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REPLACE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT; @@ -833,7 +832,7 @@ void populate() { // Register library operator registerOperator(REGEXP, SqlLibraryOperators.REGEXP); - registerOperator(REGEX_MATCH, SqlLibraryOperators.REGEXP_CONTAINS); + registerOperator(REGEXP_MATCH, SqlLibraryOperators.REGEXP_CONTAINS); registerOperator(CONCAT, SqlLibraryOperators.CONCAT_FUNCTION); registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS); registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS); @@ -844,7 +843,6 @@ void populate() { registerOperator(MD5, SqlLibraryOperators.MD5); registerOperator(SHA1, SqlLibraryOperators.SHA1); registerOperator(CRC32, SqlLibraryOperators.CRC32); - registerOperator(INTERNAL_REGEXP_REPLACE_3, SqlLibraryOperators.REGEXP_REPLACE_3); registerOperator(INTERNAL_REGEXP_REPLACE_PG_4, SqlLibraryOperators.REGEXP_REPLACE_PG_4); registerOperator(INTERNAL_REGEXP_REPLACE_5, SqlLibraryOperators.REGEXP_REPLACE_5); registerOperator(INTERNAL_TRANSLATE3, SqlLibraryOperators.TRANSLATE3); diff --git a/core/src/main/java/org/opensearch/sql/utils/ParseUtils.java b/core/src/main/java/org/opensearch/sql/utils/ParseUtils.java index 79f1020e3e1..c0150bbbc82 100644 --- a/core/src/main/java/org/opensearch/sql/utils/ParseUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/ParseUtils.java @@ -32,7 +32,7 @@ public class ParseUtils { ImmutableMap.of( ParseMethod.REGEX, BuiltinFunctionName.INTERNAL_PARSE, ParseMethod.GROK, BuiltinFunctionName.INTERNAL_GROK, - ParseMethod.PATTERNS, BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3); + ParseMethod.PATTERNS, BuiltinFunctionName.REPLACE); /** * Construct corresponding ParseExpression by {@link ParseMethod}. diff --git a/docs/user/ppl/functions/condition.rst b/docs/user/ppl/functions/condition.rst index 58feecdcfcb..8082d961ab5 100644 --- a/docs/user/ppl/functions/condition.rst +++ b/docs/user/ppl/functions/condition.rst @@ -571,7 +571,7 @@ Example:: | 969 | +-----+ -REGEX_MATCH +REGEXP_MATCH ----------- Description @@ -579,7 +579,7 @@ Description Version: 3.3.0 -Usage: regex_match(string, pattern) returns true if the regular expression pattern finds a match against any substring of the string value, otherwise returns false. +Usage: regexp_match(string, pattern) returns true if the regular expression pattern finds a match against any substring of the string value, otherwise returns false. The function uses Java regular expression syntax for the pattern. @@ -589,7 +589,7 @@ Return type: BOOLEAN Example:: - #os> source=logs | where regex_match(message, 'ERROR|WARN|FATAL') | fields timestamp, message + #os> source=logs | where regexp_match(message, 'ERROR|WARN|FATAL') | fields timestamp, message fetched rows / total rows = 3/100 +---------------------+------------------------------------------+ | timestamp | message | @@ -599,7 +599,7 @@ Example:: | 2024-01-15 10:25:33 | FATAL: System crashed unexpectedly | +---------------------+------------------------------------------+ - #os> source=users | where regex_match(email, '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') | fields name, email + #os> source=users | where regexp_match(email, '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') | fields name, email fetched rows / total rows = 2/3 +-------+----------------------+ | name | email | @@ -608,7 +608,7 @@ Example:: | Alice | alice@company.org | +-------+----------------------+ - #os> source=network | where regex_match(ip_address, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') AND NOT regex_match(ip_address, '^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)') | fields ip_address, status + #os> source=network | where regexp_match(ip_address, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') AND NOT regexp_match(ip_address, '^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)') | fields ip_address, status fetched rows / total rows = 2/10 +---------------+--------+ | ip_address | status | @@ -617,7 +617,7 @@ Example:: | 1.1.1.1 | active | +---------------+--------+ - #os> source=products | eval category = if(regex_match(name, '(?i)(laptop|computer|desktop)'), 'Computing', if(regex_match(name, '(?i)(phone|tablet|mobile)'), 'Mobile', 'Other')) | fields name, category + #os> source=products | eval category = if(regexp_match(name, '(?i)(laptop|computer|desktop)'), 'Computing', if(regexp_match(name, '(?i)(phone|tablet|mobile)'), 'Mobile', 'Other')) | fields name, category fetched rows / total rows = 4/4 +------------------------+----------+ | name | category | diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index eb82a06a055..fac59d27eca 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -415,3 +415,29 @@ Example:: |---------------------+---------------------| | HELLOWORLD | HELLOWORLD | +---------------------+---------------------+ + + +REGEXP_REPLACE +------------- + +Description +>>>>>>>>>>> + +Usage: regexp_replace(str, pattern, replacement) replace all substrings of the string value that match pattern with replacement and returns modified string value. + +Argument type: STRING, STRING, STRING + +Return type: STRING + +Synonyms: `REPLACE`_ + +Example:: + + os> source=people | eval `DOMAIN` = REGEXP_REPLACE('https://opensearch.org/downloads/', '^https?://(?:www\.)?([^/]+)/.*$', '\1') | fields `DOMAIN` + fetched rows / total rows = 1/1 + +----------------+ + | DOMAIN | + |----------------| + | opensearch.org | + +----------------+ + diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java index 4f557e70cc8..76b4d772e0a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java @@ -8,6 +8,7 @@ import static org.opensearch.sql.util.MatcherUtils.assertYamlEqualsIgnoreId; import java.io.IOException; +import java.util.HashSet; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -51,15 +52,19 @@ public static void reset() throws IOException { System.out.println(); } - /** Ignore queries that are not supported by Calcite. */ - protected Set ignored() { - if (GCedMemoryUsage.initialized()) { - return Set.of(29); - } else { + /** Ignore queries that are not supported. */ + protected Set ignored() throws IOException { + Set ignored = new HashSet(); + if (!isCalciteEnabled()) { + // regexp_replace() is not supported in v2 + ignored.add(29); + } + if (!GCedMemoryUsage.initialized()) { // Ignore q30 when use RuntimeMemoryUsage, // because of too much script push down, which will cause ResourceMonitor restriction. - return Set.of(29, 30); + ignored.add(30); } + return ignored; } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 841469115d5..7a454208826 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -32,7 +32,7 @@ public void init() throws Exception { super.init(); enableCalcite(); setQueryBucketSize(1000); - loadIndex(Index.BANK_WITH_STRING_VALUES); + loadIndex(Index.STRINGS); loadIndex(Index.BANK_WITH_NULL_VALUES); loadIndex(Index.NESTED_SIMPLE); loadIndex(Index.TIME_TEST_DATA); @@ -687,9 +687,9 @@ public void testStatsDistinctCountApproxFunctionExplainWithPushDown() throws IOE public void testExplainRegexMatchInWhereWithScriptPushdown() throws IOException { enabledOnlyWhenPushdownIsEnabled(); String query = - String.format("source=%s | where regex_match(name, 'hello')", TEST_INDEX_STRINGS); + String.format("source=%s | where regexp_match(name, 'hello')", TEST_INDEX_STRINGS); var result = explainQueryToString(query); - String expected = loadFromFile("expectedOutput/calcite/explain_regex_match_in_where.json"); + String expected = loadFromFile("expectedOutput/calcite/explain_regexp_match_in_where.json"); assertJsonEqualsIgnoreId(expected, result); } @@ -698,10 +698,10 @@ public void testExplainRegexMatchInEvalWithOutScriptPushdown() throws IOExceptio enabledOnlyWhenPushdownIsEnabled(); String query = String.format( - "source=%s |eval has_hello = regex_match(name, 'hello') | fields has_hello", + "source=%s |eval has_hello = regexp_match(name, 'hello') | fields has_hello", TEST_INDEX_STRINGS); var result = explainQueryToString(query); - String expected = loadFromFile("expectedOutput/calcite/explain_regex_match_in_eval.json"); + String expected = loadFromFile("expectedOutput/calcite/explain_regexp_match_in_eval.json"); assertJsonEqualsIgnoreId(expected, result); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTextFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTextFunctionIT.java index f95f3fcc3aa..765a2caba2c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTextFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTextFunctionIT.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.util.MatcherUtils.*; import java.io.IOException; +import java.util.stream.Stream; import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.sql.ppl.TextFunctionIT; @@ -20,68 +21,128 @@ public void init() throws Exception { enableCalcite(); } + @Test + public void testReplaceAndRegexpReplace() { + Stream.of("replace", "regexp_replace") + .forEach( + func -> { + try { + // Test regexp_replace with pattern that matches substring + String query1 = + String.format( + "source=%s | eval f=%s(name, 'ell', '\1') | fields f", + TEST_INDEX_STRINGS, func); + JSONObject result1 = executeQuery(query1); + verifySchema(result1, schema("f", null, "string")); + verifyDataRows(result1, rows("h\u0001o"), rows("world"), rows("h\u0001oworld")); + + // Test regexp_replace with pattern for beginning of string + String query2 = + String.format( + "source=%s | eval f=%s(name, '^he', '\1') | fields f", + TEST_INDEX_STRINGS, func); + JSONObject result2 = executeQuery(query2); + verifySchema(result2, schema("f", null, "string")); + verifyDataRows(result2, rows("\u0001llo"), rows("world"), rows("\u0001lloworld")); + + // Test regexp_replace with pattern for end of string + String query3 = + String.format( + "source=%s | eval f=%s(name, 'ld$', '\1') | fields f", + TEST_INDEX_STRINGS, func); + JSONObject result3 = executeQuery(query3); + verifySchema(result3, schema("f", null, "string")); + verifyDataRows(result3, rows("hello"), rows("wor\u0001"), rows("hellowor\u0001")); + + // Test regexp_replace with complex pattern + String query4 = + String.format( + "source=%s | eval f=%s(name, '[hw]o.*d', '\1') | fields f", + TEST_INDEX_STRINGS, func); + JSONObject result4 = executeQuery(query4); + verifySchema(result4, schema("f", null, "string")); + verifyDataRows(result4, rows("hello"), rows("\u0001"), rows("hello\u0001")); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + @Test public void testRegexMatch() throws IOException { - // Test regex_match with pattern that matches substring - String query1 = + // be compatible with old one + String query = String.format("source=%s | eval f=regex_match(name, 'ell') | fields f", TEST_INDEX_STRINGS); + JSONObject result1 = executeQuery(query); + verifySchema(result1, schema("f", null, "boolean")); + verifyDataRows(result1, rows(true), rows(false), rows(true)); + } + + @Test + public void testRegexpMatch() throws IOException { + // Test regexp_match with pattern that matches substring + String query1 = + String.format( + "source=%s | eval f=regexp_match(name, 'ell') | fields f", TEST_INDEX_STRINGS); JSONObject result1 = executeQuery(query1); verifySchema(result1, schema("f", null, "boolean")); verifyDataRows(result1, rows(true), rows(false), rows(true)); - // Test regex_match with pattern for beginning of string + // Test regexp_match with pattern for beginning of string String query2 = - String.format("source=%s | eval f=regex_match(name, '^he') | fields f", TEST_INDEX_STRINGS); + String.format( + "source=%s | eval f=regexp_match(name, '^he') | fields f", TEST_INDEX_STRINGS); JSONObject result2 = executeQuery(query2); verifySchema(result2, schema("f", null, "boolean")); verifyDataRows(result2, rows(true), rows(false), rows(true)); - // Test regex_match with pattern for end of string + // Test regexp_match with pattern for end of string String query3 = - String.format("source=%s | eval f=regex_match(name, 'ld$') | fields f", TEST_INDEX_STRINGS); + String.format( + "source=%s | eval f=regexp_match(name, 'ld$') | fields f", TEST_INDEX_STRINGS); JSONObject result3 = executeQuery(query3); verifySchema(result3, schema("f", null, "boolean")); verifyDataRows(result3, rows(false), rows(true), rows(true)); - // Test regex_match with complex pattern + // Test regexp_match with complex pattern String query4 = String.format( - "source=%s | eval f=regex_match(name, '[hw]o.*d') | fields f", TEST_INDEX_STRINGS); + "source=%s | eval f=regexp_match(name, '[hw]o.*d') | fields f", TEST_INDEX_STRINGS); JSONObject result4 = executeQuery(query4); verifySchema(result4, schema("f", null, "boolean")); verifyDataRows(result4, rows(false), rows(true), rows(true)); } @Test - public void testRegexMatchWithWhereClause() throws IOException { - // Test filtering with regex_match - find strings containing 'ell' + public void testRegexpMatchWithWhereClause() throws IOException { + // Test filtering with regexp_match - find strings containing 'ell' String query1 = String.format( - "source=%s | where regex_match(name, 'ell') | fields name", TEST_INDEX_STRINGS); + "source=%s | where regexp_match(name, 'ell') | fields name", TEST_INDEX_STRINGS); JSONObject result1 = executeQuery(query1); verifySchema(result1, schema("name", null, "string")); verifyDataRows(result1, rows("hello"), rows("helloworld")); - // Test filtering with regex_match - find strings starting with 'h' + // Test filtering with regexp_match - find strings starting with 'h' String query2 = String.format( - "source=%s | where regex_match(name, '^h') | fields name", TEST_INDEX_STRINGS); + "source=%s | where regexp_match(name, '^h') | fields name", TEST_INDEX_STRINGS); JSONObject result2 = executeQuery(query2); verifySchema(result2, schema("name", null, "string")); verifyDataRows(result2, rows("hello"), rows("helloworld")); - // Test filtering with regex_match - find strings ending with 'ld' + // Test filtering with regexp_match - find strings ending with 'ld' String query3 = String.format( - "source=%s | where regex_match(name, 'ld$') | fields name", TEST_INDEX_STRINGS); + "source=%s | where regexp_match(name, 'ld$') | fields name", TEST_INDEX_STRINGS); JSONObject result3 = executeQuery(query3); verifySchema(result3, schema("name", null, "string")); verifyDataRows(result3, rows("world"), rows("helloworld")); - // Test NOT regex_match - find strings NOT containing 'o' + // Test NOT regexp_match - find strings NOT containing 'o' String query4 = String.format( - "source=%s | where NOT regex_match(name, 'o') | fields name", TEST_INDEX_STRINGS); + "source=%s | where NOT regexp_match(name, 'o') | fields name", TEST_INDEX_STRINGS); JSONObject result4 = executeQuery(query4); verifySchema(result4, schema("name", null, "string")); // No rows should match since all strings contain 'o' @@ -89,11 +150,11 @@ public void testRegexMatchWithWhereClause() throws IOException { } @Test - public void testRegexMatchWithComplexPatterns() throws IOException { + public void testRegexpMatchWithComplexPatterns() throws IOException { // Test regex with alternation - match strings containing either 'hello' or 'world' String query1 = String.format( - "source=%s | where regex_match(name, '(hello|world)') | fields name | head 3", + "source=%s | where regexp_match(name, '(hello|world)') | fields name | head 3", TEST_INDEX_STRINGS); JSONObject result1 = executeQuery(query1); verifySchema(result1, schema("name", null, "string")); @@ -102,7 +163,7 @@ public void testRegexMatchWithComplexPatterns() throws IOException { // Test regex with word boundary - exact word match String query2 = String.format( - "source=%s | where regex_match(name, '\\\\bhello\\\\b') | fields name", + "source=%s | where regexp_match(name, '\\\\bhello\\\\b') | fields name", TEST_INDEX_STRINGS); JSONObject result2 = executeQuery(query2); verifySchema(result2, schema("name", null, "string")); @@ -111,18 +172,18 @@ public void testRegexMatchWithComplexPatterns() throws IOException { // Test regex with quantifiers - at least 5 characters String query3 = String.format( - "source=%s | where regex_match(name, '^.{5,}$') | fields name", TEST_INDEX_STRINGS); + "source=%s | where regexp_match(name, '^.{5,}$') | fields name", TEST_INDEX_STRINGS); JSONObject result3 = executeQuery(query3); verifySchema(result3, schema("name", null, "string")); verifyDataRows(result3, rows("hello"), rows("world"), rows("helloworld")); } @Test - public void testRegexMatchInEvalWithConditions() throws IOException { - // Test regex_match in IF condition + public void testRegexpMatchInEvalWithConditions() throws IOException { + // Test regexp_match in IF condition String query1 = String.format( - "source=%s | eval category = if(regex_match(name, '^h'), 'starts_with_h', 'other') |" + "source=%s | eval category = if(regexp_match(name, '^h'), 'starts_with_h', 'other') |" + " fields name, category", TEST_INDEX_STRINGS); JSONObject result1 = executeQuery(query1); @@ -133,11 +194,12 @@ public void testRegexMatchInEvalWithConditions() throws IOException { rows("world", "other"), rows("helloworld", "starts_with_h")); - // Test combining regex_match results + // Test combining regexp_match results String query2 = String.format( - "source=%s | eval has_hello = regex_match(name, 'hello'), has_world = regex_match(name," - + " 'world') | where has_hello OR has_world | fields name, has_hello, has_world", + "source=%s | eval has_hello = regexp_match(name, 'hello'), has_world =" + + " regexp_match(name, 'world') | where has_hello OR has_world | fields name," + + " has_hello, has_world", TEST_INDEX_STRINGS); JSONObject result2 = executeQuery(query2); verifySchema( diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index ee979900c48..47632dbc942 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -636,7 +636,7 @@ public enum Index { "account_null", getBankWithNullValuesIndexMapping(), "src/test/resources/bank_with_null_values.json"), - BANK_WITH_STRING_VALUES( + STRINGS( TestsConstants.TEST_INDEX_STRINGS, "strings", getStringIndexMapping(), diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index 399839c26ff..93e9af8e2a5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -24,7 +24,7 @@ public void init() throws Exception { super.init(); loadIndex(Index.BANK); loadIndex(Index.DOG); - loadIndex(Index.BANK_WITH_STRING_VALUES); + loadIndex(Index.STRINGS); } @Test @@ -123,12 +123,26 @@ public void testAppendcol() throws IOException { } @Test - public void testRegexMatch() throws IOException { - // Test regex_match with pattern that matches substring + public void testRegexpMatch() throws IOException { + // Test regexp_match with pattern that matches substring try { executeQuery( String.format( - "source=%s | eval f=regex_match(name, 'ell') | fields f", TEST_INDEX_STRINGS)); + "source=%s | eval f=regexp_match(name, 'ell') | fields f", TEST_INDEX_STRINGS)); + } catch (ResponseException e) { + JSONObject result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + verifyQuery(result); + } + } + + @Test + public void testRegexpReplace() throws IOException { + // Test regexp_replace with pattern that matches substring + try { + executeQuery( + String.format( + "source=%s | eval f=regexp_replace(name, 'ell', '\1') | fields f", + TEST_INDEX_STRINGS)); } catch (ResponseException e) { JSONObject result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); verifyQuery(result); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/TextFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/TextFunctionIT.java index 5e1f0ef95a4..7fe360d8844 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/TextFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/TextFunctionIT.java @@ -20,7 +20,7 @@ public class TextFunctionIT extends PPLIntegTestCase { public void init() throws Exception { super.init(); loadIndex(Index.BANK); - loadIndex(Index.BANK_WITH_STRING_VALUES); + loadIndex(Index.STRINGS); } void verifyQuery( diff --git a/integ-test/src/test/resources/clickbench/queries/q29.ppl b/integ-test/src/test/resources/clickbench/queries/q29.ppl index b9f340ed125..8c86b8997ce 100644 --- a/integ-test/src/test/resources/clickbench/queries/q29.ppl +++ b/integ-test/src/test/resources/clickbench/queries/q29.ppl @@ -3,9 +3,12 @@ SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; */ +/* +OpenSearch accepts Json as restful request payload, convert \. to \\. and \1 to \\1 +*/ source=hits -| Referer != '' -| eval k = regexp_replace(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') +| where Referer != '' +| eval k = regexp_replace(Referer, '^https?://(?:www\\.)?([^/]+)/.*$', '\\1') | stats bucket_nullable=false avg(length(Referer)) as l, count() as c, min(Referer) by k | where c > 100000 | sort - l diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml index e69de29bb2d..9a7638f1811 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml @@ -0,0 +1,18 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$0], dir0=[DESC-nulls-last], fetch=[25]) + LogicalFilter(condition=[>($1, 100000)]) + LogicalProject(l=[$1], c=[$2], min(Referer)=[$3], k=[$0]) + LogicalAggregate(group=[{0}], l=[AVG($2)], c=[COUNT()], min(Referer)=[MIN($1)]) + LogicalProject(k=[$111], Referer=[$95], $f3=[CHAR_LENGTH($95)]) + LogicalFilter(condition=[IS NOT NULL($111)]) + LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], k=[REGEXP_REPLACE($95, '^https?://(?:www\.)?([^/]+)/.*$':VARCHAR, '$1')]) + LogicalFilter(condition=[<>($95, '')]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableLimit(fetch=[25]) + EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[100000], expr#5=[>($t1, $t4)], proj#0..3=[{exprs}], $condition=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[Referer], SCRIPT->AND(<>($0, ''), IS NOT NULL(REGEXP_REPLACE($0, '^https?://(?:www\.)?([^/]+)/.*$':VARCHAR, '$1'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($2),c=COUNT(),min(Referer)=MIN($1)), PROJECT->[l, c, min(Referer), k]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"Referer","boost":1.0}}],"must_not":[{"term":{"Referer":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJSZWZlcmVyIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AsN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiSVMgTk9UIE5VTEwiLAogICAgImtpbmQiOiAiSVNfTk9UX05VTEwiLAogICAgInN5bnRheCI6ICJQT1NURklYIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAiXmh0dHBzPzovLyg/Ond3d1xcLik/KFteL10rKS8uKiQiLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6ICIkMSIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMgogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQAB1JlZmVyZXJ+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAZTVFJJTkd4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["Referer"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"k":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJSZWZlcmVyIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Adp7CiAgIm9wIjogewogICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJeaHR0cHM/Oi8vKD86d3d3XFwuKT8oW14vXSspLy4qJCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICIkMSIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAyCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdSZWZlcmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAGU1RSSU5HeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJSZWZlcmVyIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AKZ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAHUmVmZXJlcn5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABlNUUklOR3h4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}},"min(Referer)":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"Referer"}],"sort":[{"Referer":{"order":"asc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_match_in_eval.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_eval.json similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/explain_regex_match_in_eval.json rename to integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_eval.json diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_match_in_where.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.json similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/explain_regex_match_in_where.json rename to integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.json diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index bbd0ba94b41..8d64bfabb1f 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -166,7 +166,8 @@ XOR: 'XOR'; TRUE: 'TRUE'; FALSE: 'FALSE'; REGEXP: 'REGEXP'; -REGEX_MATCH: 'REGEX_MATCH'; +REGEXP_MATCH: 'REGEXP_MATCH' | 'REGEX_MATCH'; +REGEXP_REPLACE: 'REGEXP_REPLACE'; // DATETIME, INTERVAL AND UNIT KEYWORDS CONVERT_TZ: 'CONVERT_TZ'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index dd9442995ad..fd6eb26fe7a 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1264,7 +1264,7 @@ conditionFunctionName | ISNULL | ISNOTNULL | CIDRMATCH - | REGEX_MATCH + | REGEXP_MATCH | JSON_VALID | ISPRESENT | ISEMPTY @@ -1305,6 +1305,7 @@ textFunctionName | REPLACE | REVERSE | TONUMBER + | REGEXP_REPLACE ; positionFunctionName diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 5dc1bf44d86..c18d558ffc7 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -85,6 +85,8 @@ public class AstExpressionBuilder extends OpenSearchPPLParserBaseVisitor() .put("isnull", IS_NULL.getName().getFunctionName()) .put("isnotnull", IS_NOT_NULL.getName().getFunctionName()) + .put("regex_match", REGEXP_MATCH.getName().getFunctionName()) // compatible with old one + .put("regexp_replace", REPLACE.getName().getFunctionName()) .build(); private final AstBuilder astBuilder; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java index f67f7601dfe..3664b273d09 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java @@ -301,7 +301,29 @@ public void testLike() { } @Test - public void testRegexMatch() { + public void testRegexpMatch() { + String ppl = "source=EMP | where regexp_match(ENAME, '^[A-C]') | fields ENAME"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalProject(ENAME=[$1])\n" + + " LogicalFilter(condition=[REGEXP_CONTAINS($1, '^[A-C]':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "" + "ENAME=ALLEN\n" + "ENAME=BLAKE\n" + "ENAME=CLARK\n" + "ENAME=ADAMS\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "" + + "SELECT `ENAME`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE REGEXP_CONTAINS(`ENAME`, '^[A-C]')"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testRegexMatchCompatibility() { String ppl = "source=EMP | where regex_match(ENAME, '^[A-C]') | fields ENAME"; RelNode root = getRelNode(ppl); String expectedLogical = @@ -323,8 +345,8 @@ public void testRegexMatch() { } @Test - public void testRegexMatchWithPattern() { - String ppl = "source=EMP | eval matches = regex_match(JOB, 'MAN.*') | fields JOB, matches"; + public void testRegexpMatchWithPattern() { + String ppl = "source=EMP | eval matches = regexp_match(JOB, 'MAN.*') | fields JOB, matches"; RelNode root = getRelNode(ppl); String expectedLogical = "" @@ -338,9 +360,9 @@ public void testRegexMatchWithPattern() { } @Test - public void testRegexMatchInEval() { + public void testRegexpMatchInEval() { String ppl = - "source=EMP | eval result = if(regex_match(ENAME, '^S'), 1, 0) | where result = 1 | fields" + "source=EMP | eval result = if(regexp_match(ENAME, '^S'), 1, 0) | where result = 1 | fields" + " ENAME"; RelNode root = getRelNode(ppl); String expectedLogical = @@ -364,9 +386,9 @@ public void testRegexMatchInEval() { } @Test - public void testRegexMatchInWhereClause() { + public void testRegexpMatchInWhereClause() { // Test with WHERE clause to filter employees with names ending in 'ES' - String ppl = "source=EMP | where regex_match(ENAME, 'ES$') | fields ENAME, JOB"; + String ppl = "source=EMP | where regexp_match(ENAME, 'ES$') | fields ENAME, JOB"; RelNode root = getRelNode(ppl); String expectedLogical = "" @@ -386,10 +408,10 @@ public void testRegexMatchInWhereClause() { } @Test - public void testRegexMatchWithJobPattern() { + public void testRegexpMatchWithJobPattern() { // Test filtering ANALYST and MANAGER positions using regex String ppl = - "source=EMP | where regex_match(JOB, '(ANALYST|MANAGER)') | fields ENAME, JOB, SAL"; + "source=EMP | where regexp_match(JOB, '(ANALYST|MANAGER)') | fields ENAME, JOB, SAL"; RelNode root = getRelNode(ppl); String expectedLogical = "" @@ -408,9 +430,9 @@ public void testRegexMatchWithJobPattern() { } @Test - public void testRegexMatchCaseInsensitive() { + public void testRegexpMatchCaseInsensitive() { // Test case-insensitive pattern matching - String ppl = "source=EMP | where regex_match(ENAME, '(?i)^[m-s]') | fields ENAME | head 5"; + String ppl = "source=EMP | where regexp_match(ENAME, '(?i)^[m-s]') | fields ENAME | head 5"; RelNode root = getRelNode(ppl); String expectedLogical = "" @@ -431,10 +453,10 @@ public void testRegexMatchCaseInsensitive() { } @Test - public void testRegexMatchWithMultipleConditions() { - // Test combining regex_match with other conditions + public void testRegexpMatchWithMultipleConditions() { + // Test combining regexp_match with other conditions String ppl = - "source=EMP | where regex_match(JOB, 'CLERK') AND SAL > 1000 | fields ENAME, JOB, SAL"; + "source=EMP | where regexp_match(JOB, 'CLERK') AND SAL > 1000 | fields ENAME, JOB, SAL"; RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(ENAME=[$1], JOB=[$2], SAL=[$5])\n" @@ -453,9 +475,9 @@ public void testRegexMatchWithMultipleConditions() { } @Test - public void testRegexMatchNegation() { - // Test NOT regex_match pattern - String ppl = "source=EMP | where NOT regex_match(JOB, 'CLERK|SALESMAN') | fields ENAME, JOB"; + public void testRegexpMatchNegation() { + // Test NOT regexp_match pattern + String ppl = "source=EMP | where NOT regexp_match(JOB, 'CLERK|SALESMAN') | fields ENAME, JOB"; RelNode root = getRelNode(ppl); String expectedLogical = "" @@ -474,10 +496,10 @@ public void testRegexMatchNegation() { } @Test - public void testRegexMatchWithStats() { - // Test regex_match with aggregation + public void testRegexpMatchWithStats() { + // Test regexp_match with aggregation String ppl = - "source=EMP | where regex_match(JOB, 'MAN') | stats count() as manager_count, avg(SAL) as" + "source=EMP | where regexp_match(JOB, 'MAN') | stats count() as manager_count, avg(SAL) as" + " avg_salary"; RelNode root = getRelNode(ppl); String expectedLogical = From 0050b31e3951b697729bc562fbd5e13826e782b0 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Wed, 12 Nov 2025 18:23:07 +0800 Subject: [PATCH 43/99] Translate SAFE_CAST to TRY_CAST in Spark SQL (#4788) Signed-off-by: Lantao Jin --- .../calcite/CalcitePPLCastFunctionTest.java | 10 +-- .../sql/ppl/calcite/CalcitePPLChartTest.java | 8 +- ...alcitePPLEventstatsEarliestLatestTest.java | 18 ++--- .../ppl/calcite/CalcitePPLPatternsTest.java | 76 ++++++++----------- .../CalcitePPLStatsEarliestLatestTest.java | 14 ++-- .../calcite/CalcitePPLStringFunctionTest.java | 2 +- .../calcite/OpenSearchSparkSqlDialect.java | 25 ++++-- 7 files changed, 75 insertions(+), 78 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLCastFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLCastFunctionTest.java index 3a0da79657f..95fc53b588f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLCastFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLCastFunctionTest.java @@ -28,7 +28,7 @@ public void testCast() { verifyLogical(root, expectedLogical); // TODO there is no SAFE_CAST() in Spark, the Spark CAST is always safe (return null). - String expectedSparkSql = "SELECT SAFE_CAST(`MGR` AS STRING) `a`\nFROM `scott`.`EMP`"; + String expectedSparkSql = "SELECT TRY_CAST(`MGR` AS STRING) `a`\nFROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -40,7 +40,7 @@ public void testCastInsensitive() { "" + "LogicalProject(a=[SAFE_CAST($3)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedSparkSql = "SELECT SAFE_CAST(`MGR` AS STRING) `a`\nFROM `scott`.`EMP`"; + String expectedSparkSql = "SELECT TRY_CAST(`MGR` AS STRING) `a`\nFROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -56,7 +56,7 @@ public void testCastOverriding() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " SAFE_CAST(`MGR` AS STRING) `age`\n" + + " TRY_CAST(`MGR` AS STRING) `age`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -83,7 +83,7 @@ public void testChainedCast() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "" + "SELECT SAFE_CAST(SAFE_CAST(`MGR` AS STRING) AS INTEGER) `a`\n" + "FROM `scott`.`EMP`"; + "" + "SELECT TRY_CAST(TRY_CAST(`MGR` AS STRING) AS INTEGER) `a`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -117,7 +117,7 @@ public void testChainedCast2() { String expectedSparkSql = "" - + "SELECT SAFE_CAST(CONCAT(SAFE_CAST(`MGR` AS STRING), '0') AS INTEGER) `a`\n" + + "SELECT TRY_CAST(CONCAT(TRY_CAST(`MGR` AS STRING), '0') AS INTEGER) `a`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java index 338b586ba29..bddcde11e18 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java @@ -111,14 +111,14 @@ public void testChartWithMultipleGroupKeys() { "SELECT `t2`.`gender`, CASE WHEN `t2`.`age` IS NULL THEN 'NULL' WHEN" + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`age` ELSE 'OTHER' END `age`," + " AVG(`t2`.`avg(balance)`) `avg(balance)`\n" - + "FROM (SELECT `gender`, SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`)" + + "FROM (SELECT `gender`, TRY_CAST(`age` AS STRING) `age`, AVG(`balance`)" + " `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + "GROUP BY `gender`, `age`) `t2`\n" + "LEFT JOIN (SELECT `age`, SUM(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + " (ORDER BY SUM(`avg(balance)`) DESC) `_row_number_chart_`\n" - + "FROM (SELECT SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + + "FROM (SELECT TRY_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + "GROUP BY `gender`, `age`) `t6`\n" @@ -139,14 +139,14 @@ public void testChartWithMultipleGroupKeysAlternativeSyntax() { "SELECT `t2`.`gender`, CASE WHEN `t2`.`age` IS NULL THEN 'NULL' WHEN" + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`age` ELSE 'OTHER' END `age`," + " AVG(`t2`.`avg(balance)`) `avg(balance)`\n" - + "FROM (SELECT `gender`, SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`)" + + "FROM (SELECT `gender`, TRY_CAST(`age` AS STRING) `age`, AVG(`balance`)" + " `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + "GROUP BY `gender`, `age`) `t2`\n" + "LEFT JOIN (SELECT `age`, SUM(`avg(balance)`) `__grand_total__`, ROW_NUMBER() OVER" + " (ORDER BY SUM(`avg(balance)`) DESC) `_row_number_chart_`\n" - + "FROM (SELECT SAFE_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + + "FROM (SELECT TRY_CAST(`age` AS STRING) `age`, AVG(`balance`) `avg(balance)`\n" + "FROM `scott`.`bank`\n" + "WHERE `gender` IS NOT NULL AND `balance` IS NOT NULL\n" + "GROUP BY `gender`, `age`) `t6`\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsEarliestLatestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsEarliestLatestTest.java index d91a8638cb2..f76a7af2c79 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsEarliestLatestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsEarliestLatestTest.java @@ -48,7 +48,7 @@ public void testEventstatsEarliestWithoutSecondArgument() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY (`message`," + "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY(`message`," + " `@timestamp`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)" + " `earliest_message`\n" + "FROM `POST`.`LOGS`"; @@ -66,7 +66,7 @@ public void testEventstatsLatestWithoutSecondArgument() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MAX_BY (`message`," + "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MAX_BY(`message`," + " `@timestamp`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)" + " `latest_message`\n" + "FROM `POST`.`LOGS`"; @@ -84,7 +84,7 @@ public void testEventstatsEarliestByServerWithoutSecondArgument() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY (`message`," + "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY(`message`," + " `@timestamp`) OVER (PARTITION BY `server` RANGE BETWEEN UNBOUNDED PRECEDING AND" + " UNBOUNDED FOLLOWING) `earliest_message`\n" + "FROM `POST`.`LOGS`"; @@ -102,7 +102,7 @@ public void testEventstatsLatestByServerWithoutSecondArgument() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MAX_BY (`message`," + "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MAX_BY(`message`," + " `@timestamp`) OVER (PARTITION BY `server` RANGE BETWEEN UNBOUNDED PRECEDING AND" + " UNBOUNDED FOLLOWING) `latest_message`\n" + "FROM `POST`.`LOGS`"; @@ -122,7 +122,7 @@ public void testEventstatsEarliestWithOtherAggregatesWithoutSecondArgument() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY (`message`," + "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY(`message`," + " `@timestamp`) OVER (PARTITION BY `server` RANGE BETWEEN UNBOUNDED PRECEDING AND" + " UNBOUNDED FOLLOWING) `earliest_message`, COUNT(*) OVER (PARTITION BY `server` RANGE" + " BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) `cnt`\n" @@ -141,7 +141,7 @@ public void testEventstatsEarliestWithExplicitTimestampField() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY (`message`," + "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY(`message`," + " `created_at`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)" + " `earliest_message`\n" + "FROM `POST`.`LOGS`"; @@ -159,7 +159,7 @@ public void testEventstatsLatestWithExplicitTimestampField() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MAX_BY (`message`," + "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MAX_BY(`message`," + " `created_at`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)" + " `latest_message`\n" + "FROM `POST`.`LOGS`"; @@ -180,9 +180,9 @@ public void testEventstatsEarliestLatestCombined() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY (`message`," + "SELECT `server`, `level`, `message`, `@timestamp`, `created_at`, MIN_BY(`message`," + " `@timestamp`) OVER (PARTITION BY `server` RANGE BETWEEN UNBOUNDED PRECEDING AND" - + " UNBOUNDED FOLLOWING) `earliest_msg`, MAX_BY (`message`, `@timestamp`) OVER" + + " UNBOUNDED FOLLOWING) `earliest_msg`, MAX_BY(`message`, `@timestamp`) OVER" + " (PARTITION BY `server` RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)" + " `latest_msg`\n" + "FROM `POST`.`LOGS`"; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java index d72c3b086cc..f1dfd930a82 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java @@ -64,9 +64,9 @@ public void testPatternsLabelMode_ShowNumberedToken_ForSimplePatternMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + "SELECT `ENAME`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['pattern'] AS" - + " STRING) `patterns_field`, SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR" + + " STRING) `patterns_field`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR" + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END," + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -91,9 +91,9 @@ public void testPatternsLabelModeWithCustomPattern_ShowNumberedToken_ForSimplePa verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + "SELECT `ENAME`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['pattern'] AS STRING)" - + " `patterns_field`, SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` =" + + " `patterns_field`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` =" + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['tokens'] AS" + " MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -138,9 +138,9 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_SimplePattern verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `DEPTNO`, SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME`" + "SELECT `ENAME`, `DEPTNO`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME`" + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END," - + " `ENAME`)['pattern'] AS STRING) `patterns_field`, SAFE_CAST(`PATTERN_PARSER`(CASE" + + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(`PATTERN_PARSER`(CASE" + " WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," + " '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" + " `tokens`\n" @@ -160,7 +160,7 @@ public void testPatternsLabelMode_NotShowNumberedToken_ForBrainMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, SAFE_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, FALSE)" + "SELECT `ENAME`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, FALSE)" + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), FALSE)['pattern']" + " AS STRING) `patterns_field`\n" + "FROM `scott`.`EMP`"; @@ -183,9 +183,9 @@ public void testPatternsLabelMode_ShowNumberedToken_ForBrainMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, SAFE_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, TRUE)" + "SELECT `ENAME`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, TRUE)" + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), TRUE)['pattern']" - + " AS STRING) `patterns_field`, SAFE_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`," + + " AS STRING) `patterns_field`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`," + " 10, 100000, TRUE) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)," + " TRUE)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -206,7 +206,7 @@ public void testPatternsLabelModeWithPartitionBy_NotShowNumberedToken_ForBrainMe verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `DEPTNO`, SAFE_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10," + "SELECT `ENAME`, `DEPTNO`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10," + " 100000, FALSE) OVER (PARTITION BY `DEPTNO` RANGE BETWEEN UNBOUNDED PRECEDING AND" + " UNBOUNDED FOLLOWING), FALSE)['pattern'] AS STRING) `patterns_field`\n" + "FROM `scott`.`EMP`"; @@ -229,10 +229,10 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_ForBrainMetho verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `DEPTNO`, SAFE_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10," + "SELECT `ENAME`, `DEPTNO`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10," + " 100000, TRUE) OVER (PARTITION BY `DEPTNO` RANGE BETWEEN UNBOUNDED PRECEDING AND" + " UNBOUNDED FOLLOWING), TRUE)['pattern'] AS STRING) `patterns_field`," - + " SAFE_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, TRUE) OVER" + + " TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, TRUE) OVER" + " (PARTITION BY `DEPTNO` RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)," + " TRUE)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -281,11 +281,11 @@ public void testPatternsAggregationMode_ShowNumberedToken_ForSimplePatternMethod verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + "SELECT TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['pattern']" + " AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END) `pattern_count`," - + " SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + + " TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['tokens']" + " AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" @@ -312,11 +312,11 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForSimplePa verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `DEPTNO`, SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + "SELECT `DEPTNO`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END)" - + " `pattern_count`, SAFE_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` =" + + " `pattern_count`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` =" + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + " 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10)" + " `sample_logs`\n" @@ -344,14 +344,10 @@ public void testPatternsAggregationMode_NotShowNumberedToken_ForBrainMethod() { + " LogicalValues(tuples=[[{ 0 }]])\n"; verifyLogical(root, expectedLogical); - /* - * TODO: Fix Spark SQL conformance - * Spark doesn't have SAFE_CAST and UNNEST - */ String expectedSparkSql = - "SELECT SAFE_CAST(`t20`.`patterns_field`['pattern'] AS STRING) `patterns_field`," - + " SAFE_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT) `pattern_count`," - + " SAFE_CAST(`t20`.`patterns_field`['sample_logs'] AS ARRAY< STRING >) `sample_logs`\n" + "SELECT TRY_CAST(`t20`.`patterns_field`['pattern'] AS STRING) `patterns_field`," + + " TRY_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT) `pattern_count`," + + " TRY_CAST(`t20`.`patterns_field`['sample_logs'] AS ARRAY< STRING >) `sample_logs`\n" + "FROM (SELECT `pattern`(`ENAME`, 10, 100000, FALSE) `patterns_field`\n" + "FROM `scott`.`EMP`) `$cor0`,\n" + "LATERAL UNNEST((SELECT `$cor0`.`patterns_field`\n" @@ -378,15 +374,11 @@ public void testPatternsAggregationMode_ShowNumberedToken_ForBrainMethod() { + " LogicalValues(tuples=[[{ 0 }]])\n"; verifyLogical(root, expectedLogical); - /* - * TODO: Fix Spark SQL conformance - * Spark doesn't have SAFE_CAST and UNNEST - */ String expectedSparkSql = - "SELECT SAFE_CAST(`t20`.`patterns_field`['pattern'] AS STRING) `patterns_field`," - + " SAFE_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT) `pattern_count`," - + " SAFE_CAST(`t20`.`patterns_field`['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" - + " `tokens`, SAFE_CAST(`t20`.`patterns_field`['sample_logs'] AS ARRAY< STRING >)" + "SELECT TRY_CAST(`t20`.`patterns_field`['pattern'] AS STRING) `patterns_field`," + + " TRY_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT) `pattern_count`," + + " TRY_CAST(`t20`.`patterns_field`['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" + + " `tokens`, TRY_CAST(`t20`.`patterns_field`['sample_logs'] AS ARRAY< STRING >)" + " `sample_logs`\n" + "FROM (SELECT `pattern`(`ENAME`, 10, 100000, TRUE) `patterns_field`\n" + "FROM `scott`.`EMP`) `$cor0`,\n" @@ -414,14 +406,10 @@ public void testPatternsAggregationModeWithGroupBy_NotShowNumberedToken_ForBrain + " LogicalValues(tuples=[[{ 0 }]])\n"; verifyLogical(root, expectedLogical); - /* - * TODO: Fix Spark SQL conformance - * Spark doesn't have SAFE_CAST and UNNEST - */ String expectedSparkSql = - "SELECT `$cor0`.`DEPTNO`, SAFE_CAST(`t20`.`patterns_field`['pattern'] AS STRING)" - + " `patterns_field`, SAFE_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT)" - + " `pattern_count`, SAFE_CAST(`t20`.`patterns_field`['sample_logs'] AS ARRAY< STRING" + "SELECT `$cor0`.`DEPTNO`, TRY_CAST(`t20`.`patterns_field`['pattern'] AS STRING)" + + " `patterns_field`, TRY_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT)" + + " `pattern_count`, TRY_CAST(`t20`.`patterns_field`['sample_logs'] AS ARRAY< STRING" + " >) `sample_logs`\n" + "FROM (SELECT `DEPTNO`, `pattern`(`ENAME`, 10, 100000, FALSE) `patterns_field`\n" + "FROM `scott`.`EMP`\n" @@ -451,15 +439,11 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForBrainMet + " LogicalValues(tuples=[[{ 0 }]])\n"; verifyLogical(root, expectedLogical); - /* - * TODO: Fix Spark SQL conformance - * Spark doesn't have SAFE_CAST and UNNEST - */ String expectedSparkSql = - "SELECT `$cor0`.`DEPTNO`, SAFE_CAST(`t20`.`patterns_field`['pattern'] AS STRING)" - + " `patterns_field`, SAFE_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT)" - + " `pattern_count`, SAFE_CAST(`t20`.`patterns_field`['tokens'] AS MAP< VARCHAR," - + " VARCHAR ARRAY >) `tokens`, SAFE_CAST(`t20`.`patterns_field`['sample_logs'] AS" + "SELECT `$cor0`.`DEPTNO`, TRY_CAST(`t20`.`patterns_field`['pattern'] AS STRING)" + + " `patterns_field`, TRY_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT)" + + " `pattern_count`, TRY_CAST(`t20`.`patterns_field`['tokens'] AS MAP< VARCHAR," + + " VARCHAR ARRAY >) `tokens`, TRY_CAST(`t20`.`patterns_field`['sample_logs'] AS" + " ARRAY< STRING >) `sample_logs`\n" + "FROM (SELECT `DEPTNO`, `pattern`(`ENAME`, 10, 100000, TRUE) `patterns_field`\n" + "FROM `scott`.`EMP`\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStatsEarliestLatestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStatsEarliestLatestTest.java index f5ee3780411..cba3942da3a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStatsEarliestLatestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStatsEarliestLatestTest.java @@ -51,7 +51,7 @@ public void testEarliestWithoutSecondArgument() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT MIN_BY (`message`, `@timestamp`) `earliest_message`\n" + "FROM `POST`.`LOGS`"; + "SELECT MIN_BY(`message`, `@timestamp`) `earliest_message`\n" + "FROM `POST`.`LOGS`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -69,7 +69,7 @@ public void testLatestWithoutSecondArgument() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT MAX_BY (`message`, `@timestamp`) `latest_message`\n" + "FROM `POST`.`LOGS`"; + "SELECT MAX_BY(`message`, `@timestamp`) `latest_message`\n" + "FROM `POST`.`LOGS`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -91,7 +91,7 @@ public void testEarliestByServerWithoutSecondArgument() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT MIN_BY (`message`, `@timestamp`) `earliest_message`, `server`\n" + "SELECT MIN_BY(`message`, `@timestamp`) `earliest_message`, `server`\n" + "FROM `POST`.`LOGS`\n" + "GROUP BY `server`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -115,7 +115,7 @@ public void testLatestByServerWithoutSecondArgument() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT MAX_BY (`message`, `@timestamp`) `latest_message`, `server`\n" + "SELECT MAX_BY(`message`, `@timestamp`) `latest_message`, `server`\n" + "FROM `POST`.`LOGS`\n" + "GROUP BY `server`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -140,7 +140,7 @@ public void testEarliestWithOtherAggregatesWithoutSecondArgument() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT MIN_BY (`message`, `@timestamp`) `earliest_message`, " + "SELECT MIN_BY(`message`, `@timestamp`) `earliest_message`, " + "COUNT(*) `cnt`, `server`\n" + "FROM `POST`.`LOGS`\n" + "GROUP BY `server`"; @@ -161,7 +161,7 @@ public void testEarliestWithExplicitTimestampField() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT MIN_BY (`message`, `created_at`) `earliest_message`\n" + "FROM `POST`.`LOGS`"; + "SELECT MIN_BY(`message`, `created_at`) `earliest_message`\n" + "FROM `POST`.`LOGS`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -179,7 +179,7 @@ public void testLatestWithExplicitTimestampField() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT MAX_BY (`message`, `created_at`) `latest_message`\n" + "FROM `POST`.`LOGS`"; + "SELECT MAX_BY(`message`, `created_at`) `latest_message`\n" + "FROM `POST`.`LOGS`"; verifyPPLToSparkSQL(root, expectedSparkSql); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java index 3664b273d09..ba2169bab6d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java @@ -76,7 +76,7 @@ public void testToStringFormatNotSpecified() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT CAST(`MGR` AS STRING) `string_value`, SAFE_CAST(`MGR` AS STRING) `cast_value`\n" + "SELECT CAST(`MGR` AS STRING) `string_value`, TRY_CAST(`MGR` AS STRING) `cast_value`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java index 24ddedd2562..2d044da58e6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java @@ -24,7 +24,10 @@ public class OpenSearchSparkSqlDialect extends SparkSqlDialect { private static final Map CALCITE_TO_SPARK_MAPPING = ImmutableMap.of( "ARG_MIN", "MIN_BY", - "ARG_MAX", "MAX_BY"); + "ARG_MAX", "MAX_BY", + "SAFE_CAST", "TRY_CAST"); + + private static final Map CALL_SEPARATOR = ImmutableMap.of("SAFE_CAST", "AS"); private OpenSearchSparkSqlDialect() { super(DEFAULT_CONTEXT); @@ -37,21 +40,31 @@ public void unparseCall(SqlWriter writer, SqlCall call, int leftPrec, int rightP // Replace Calcite specific functions with their Spark SQL equivalents if (CALCITE_TO_SPARK_MAPPING.containsKey(operatorName)) { unparseFunction( - writer, call, CALCITE_TO_SPARK_MAPPING.get(operatorName), leftPrec, rightPrec); + writer, + call, + CALCITE_TO_SPARK_MAPPING.get(operatorName), + leftPrec, + rightPrec, + CALL_SEPARATOR.getOrDefault(operatorName, ",")); } else { super.unparseCall(writer, call, leftPrec, rightPrec); } } private void unparseFunction( - SqlWriter writer, SqlCall call, String functionName, int leftPrec, int rightPrec) { - writer.keyword(functionName); + SqlWriter writer, + SqlCall call, + String functionName, + int leftPrec, + int rightPrec, + String separator) { + writer.print(functionName); final SqlWriter.Frame frame = writer.startList("(", ")"); for (int i = 0; i < call.operandCount(); i++) { if (i > 0) { - writer.sep(","); + writer.sep(separator); } - call.operand(i).unparse(writer, leftPrec, rightPrec); + call.operand(i).unparse(writer, 0, rightPrec); } writer.endList(frame); } From 772e4b60d509f010bad0e4e790ea969780ce4252 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 12 Nov 2025 18:42:21 +0800 Subject: [PATCH 44/99] Merge the implementation of `timechart` and `chart` (#4755) * Remove visitTimechart Signed-off-by: Yuanchun Shen * Migrate per functions to Chart Signed-off-by: Yuanchun Shen * Update CalcitePPLTimechartTest Signed-off-by: Yuanchun Shen * Migrate TimecharTest to use Chart Signed-off-by: Yuanchun Shen * Fix AST relevant tests Signed-off-by: Yuanchun Shen * Remove Timechart AST object in favor of Chart Signed-off-by: Yuanchun Shen * Update expected plans for timechart Signed-off-by: Yuanchun Shen * Update doctest for timechart - add 2 more indicies for test purpose Signed-off-by: Yuanchun Shen * Add yaml tests for 4581, 4582, and 4632 Signed-off-by: Yuanchun Shen * Allow flexible parameter positions for chart and timechart Signed-off-by: Yuanchun Shen * Simplify CalciteTimechartCommandIT Signed-off-by: Yuanchun Shen --------- Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/analysis/Analyzer.java | 6 - .../sql/ast/AbstractNodeVisitor.java | 5 - .../org/opensearch/sql/ast/dsl/AstDSL.java | 2 +- .../org/opensearch/sql/ast/tree/Chart.java | 158 ++++++- .../opensearch/sql/ast/tree/Timechart.java | 209 --------- .../sql/calcite/CalciteRelNodeVisitor.java | 395 ++---------------- ...mechartTest.java => PerFunctionsTest.java} | 96 +++-- docs/user/dql/metadata.rst | 52 +-- docs/user/ppl/cmd/timechart.rst | 298 ++++++------- doctest/test_data/events_many_hosts.json | 22 + doctest/test_data/events_null.json | 12 + doctest/test_docs.py | 2 + .../calcite/remote/CalciteChartCommandIT.java | 11 +- .../remote/CalciteTimechartCommandIT.java | 210 ++-------- .../org/opensearch/sql/util/MatcherUtils.java | 6 + .../calcite/explain_timechart.yaml | 49 +-- .../calcite/explain_timechart_count.yaml | 91 ++-- .../explain_timechart.yaml | 47 ++- .../explain_timechart_count.yaml | 94 ++--- .../rest-api-spec/test/issues/4550.yml | 6 +- .../rest-api-spec/test/issues/4581.yml | 53 +++ .../rest-api-spec/test/issues/4582.yml | 52 +++ .../rest-api-spec/test/issues/4632.yml | 69 +++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 28 +- .../sql/ppl/parser/AstExpressionBuilder.java | 18 +- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 52 +-- .../ppl/calcite/CalcitePPLTimechartTest.java | 191 ++++----- .../sql/ppl/parser/AstBuilderTest.java | 61 ++- .../ppl/parser/AstExpressionBuilderTest.java | 215 ++++++---- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 2 +- 31 files changed, 1096 insertions(+), 1422 deletions(-) delete mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java rename core/src/test/java/org/opensearch/sql/ast/tree/{TimechartTest.java => PerFunctionsTest.java} (70%) create mode 100644 doctest/test_data/events_many_hosts.json create mode 100644 doctest/test_data/events_null.json create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4581.yml create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4632.yml diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index f20e95c0382..2caf6803a24 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -97,7 +97,6 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Timechart; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -776,11 +775,6 @@ public LogicalPlan visitChart(Chart node, AnalysisContext context) { throw getOnlyForCalciteException("Chart"); } - @Override - public LogicalPlan visitTimechart(Timechart node, AnalysisContext context) { - throw getOnlyForCalciteException("Timechart"); - } - @Override public LogicalPlan visitWindow(Window node, AnalysisContext context) { throw getOnlyForCalciteException("Window"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 2daa4b4a2f9..a8bbfc3a82b 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -84,7 +84,6 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Timechart; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -285,10 +284,6 @@ public T visitChart(Chart node, C context) { return visitChildren(node, context); } - public T visitTimechart(Timechart node, C context) { - return visitChildren(node, context); - } - public T visitRegex(Regex node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 4e87502e73c..93ad06011c0 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -734,7 +734,7 @@ public static Bin bin(UnresolvedExpression field, Argument... arguments) { } /** Get a reference to the implicit timestamp field {@code @timestamp} */ - public static Field referImplicitTimestampField() { + public static Field implicitTimestampField() { return AstDSL.field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP); } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java index d0f982edce6..ada20cbde74 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java @@ -5,17 +5,44 @@ package org.opensearch.sql.ast.tree; +import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; +import static org.opensearch.sql.ast.dsl.AstDSL.doubleLiteral; +import static org.opensearch.sql.ast.dsl.AstDSL.eval; +import static org.opensearch.sql.ast.dsl.AstDSL.function; +import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; +import static org.opensearch.sql.ast.expression.IntervalUnit.MILLISECOND; +import static org.opensearch.sql.ast.tree.Chart.PerFunctionRateExprBuilder.timestampadd; +import static org.opensearch.sql.ast.tree.Chart.PerFunctionRateExprBuilder.timestampdiff; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPADD; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPDIFF; + import com.google.common.collect.ImmutableList; import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; +import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.ast.expression.AggregateFunction; +import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.IntervalUnit; +import org.opensearch.sql.ast.expression.Let; import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.Span; +import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.calcite.utils.PlanUtils; /** AST node represent chart command. */ @Getter @@ -39,8 +66,8 @@ public class Chart extends UnresolvedPlan { @Override public UnresolvedPlan attach(UnresolvedPlan child) { - this.child = child; - return this; + // Transform after child attached to avoid unintentionally overriding it + return toBuilder().child(child).build().transformPerFunction(); } @Override @@ -52,4 +79,131 @@ public List getChild() { public T accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitChart(this, context); } + + /** + * Transform per function to eval-based post-processing on sum result by chart. Specifically, + * calculate how many seconds are in the time bucket based on the span option dynamically, then + * divide the aggregated sum value by the number of seconds to get the per-second rate. + * + *

For example, with span=5m per_second(field): per second rate = sum(field) / 300 seconds + * + * @return eval+chart if per function present, or the original chart otherwise. + */ + private UnresolvedPlan transformPerFunction() { + Optional perFuncOpt = PerFunction.from(aggregationFunction); + if (perFuncOpt.isEmpty()) { + return this; + } + + PerFunction perFunc = perFuncOpt.get(); + // For chart, the rowSplit should contain the span information + UnresolvedExpression spanExpr = rowSplit; + if (rowSplit instanceof Alias) { + spanExpr = ((Alias) rowSplit).getDelegated(); + } + if (!(spanExpr instanceof Span)) { + return this; // Cannot transform without span information + } + + Span span = (Span) spanExpr; + Field spanStartTime = AstDSL.implicitTimestampField(); + Function spanEndTime = timestampadd(span.getUnit(), span.getValue(), spanStartTime); + Function spanMillis = timestampdiff(MILLISECOND, spanStartTime, spanEndTime); + final int SECOND_IN_MILLISECOND = 1000; + return eval( + chart(AstDSL.alias(perFunc.aggName, PerFunctionRateExprBuilder.sum(perFunc.aggArg))), + let(perFunc.aggName) + .multiply(perFunc.seconds * SECOND_IN_MILLISECOND) + .dividedBy(spanMillis)); + } + + private Chart chart(UnresolvedExpression newAggregationFunction) { + return this.toBuilder().aggregationFunction(newAggregationFunction).build(); + } + + @RequiredArgsConstructor + static class PerFunction { + private static final Map UNIT_SECONDS = + Map.of( + "per_second", 1, + "per_minute", 60, + "per_hour", 3600, + "per_day", 86400); + private final String aggName; + private final UnresolvedExpression aggArg; + private final int seconds; + + static Optional from(UnresolvedExpression aggExpr) { + if (aggExpr instanceof Alias) { + return from(((Alias) aggExpr).getDelegated()); + } + ; + if (!(aggExpr instanceof AggregateFunction)) { + return Optional.empty(); + } + + AggregateFunction aggFunc = (AggregateFunction) aggExpr; + String aggFuncName = aggFunc.getFuncName().toLowerCase(Locale.ROOT); + if (!UNIT_SECONDS.containsKey(aggFuncName)) { + return Optional.empty(); + } + + String aggName = toAggName(aggFunc); + return Optional.of( + new PerFunction(aggName, aggFunc.getField(), UNIT_SECONDS.get(aggFuncName))); + } + + private static String toAggName(AggregateFunction aggFunc) { + String fieldName = + (aggFunc.getField() instanceof Field) + ? ((Field) aggFunc.getField()).getField().toString() + : aggFunc.getField().toString(); + return String.format(Locale.ROOT, "%s(%s)", aggFunc.getFuncName(), fieldName); + } + } + + private PerFunctionRateExprBuilder let(String fieldName) { + return new PerFunctionRateExprBuilder(AstDSL.field(fieldName)); + } + + /** Fluent builder for creating Let expressions with mathematical operations. */ + static class PerFunctionRateExprBuilder { + private final Field field; + private UnresolvedExpression expr; + + PerFunctionRateExprBuilder(Field field) { + this.field = field; + this.expr = field; + } + + PerFunctionRateExprBuilder multiply(Integer multiplier) { + // Promote to double literal to avoid integer division in downstream + this.expr = + function( + MULTIPLY.getName().getFunctionName(), expr, doubleLiteral(multiplier.doubleValue())); + return this; + } + + Let dividedBy(UnresolvedExpression divisor) { + return AstDSL.let(field, function(DIVIDE.getName().getFunctionName(), expr, divisor)); + } + + static UnresolvedExpression sum(UnresolvedExpression field) { + return aggregate(SUM.getName().getFunctionName(), field); + } + + static Function timestampadd( + SpanUnit unit, UnresolvedExpression value, UnresolvedExpression timestampField) { + UnresolvedExpression intervalUnit = + stringLiteral(PlanUtils.spanUnitToIntervalUnit(unit).toString()); + return function( + TIMESTAMPADD.getName().getFunctionName(), intervalUnit, value, timestampField); + } + + static Function timestampdiff( + IntervalUnit unit, UnresolvedExpression start, UnresolvedExpression end) { + return function( + TIMESTAMPDIFF.getName().getFunctionName(), stringLiteral(unit.toString()), start, end); + } + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java b/core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java deleted file mode 100644 index 19972358721..00000000000 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Timechart.java +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.ast.tree; - -import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; -import static org.opensearch.sql.ast.dsl.AstDSL.doubleLiteral; -import static org.opensearch.sql.ast.dsl.AstDSL.eval; -import static org.opensearch.sql.ast.dsl.AstDSL.function; -import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; -import static org.opensearch.sql.ast.expression.IntervalUnit.MILLISECOND; -import static org.opensearch.sql.ast.tree.Timechart.PerFunctionRateExprBuilder.sum; -import static org.opensearch.sql.ast.tree.Timechart.PerFunctionRateExprBuilder.timestampadd; -import static org.opensearch.sql.ast.tree.Timechart.PerFunctionRateExprBuilder.timestampdiff; -import static org.opensearch.sql.calcite.plan.OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUM; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPADD; -import static org.opensearch.sql.expression.function.BuiltinFunctionName.TIMESTAMPDIFF; - -import com.google.common.collect.ImmutableList; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Optional; -import lombok.AllArgsConstructor; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.ToString; -import org.opensearch.sql.ast.AbstractNodeVisitor; -import org.opensearch.sql.ast.dsl.AstDSL; -import org.opensearch.sql.ast.expression.AggregateFunction; -import org.opensearch.sql.ast.expression.Field; -import org.opensearch.sql.ast.expression.Function; -import org.opensearch.sql.ast.expression.IntervalUnit; -import org.opensearch.sql.ast.expression.Let; -import org.opensearch.sql.ast.expression.Span; -import org.opensearch.sql.ast.expression.SpanUnit; -import org.opensearch.sql.ast.expression.UnresolvedExpression; -import org.opensearch.sql.calcite.utils.PlanUtils; - -/** AST node represent Timechart operation. */ -@Getter -@ToString -@EqualsAndHashCode(callSuper = false) -@AllArgsConstructor -@lombok.Builder(toBuilder = true) -public class Timechart extends UnresolvedPlan { - private UnresolvedPlan child; - private UnresolvedExpression binExpression; - private UnresolvedExpression aggregateFunction; - private UnresolvedExpression byField; - private Integer limit; - private Boolean useOther; - - public Timechart(UnresolvedPlan child, UnresolvedExpression aggregateFunction) { - this(child, null, aggregateFunction, null, null, true); - } - - public Timechart span(UnresolvedExpression binExpression) { - return toBuilder().binExpression(binExpression).build(); - } - - public Timechart by(UnresolvedExpression byField) { - return toBuilder().byField(byField).build(); - } - - public Timechart limit(Integer limit) { - return toBuilder().limit(limit).build(); - } - - public Timechart useOther(Boolean useOther) { - return toBuilder().useOther(useOther).build(); - } - - @Override - public UnresolvedPlan attach(UnresolvedPlan child) { - // Transform after child attached to avoid unintentionally overriding it - return toBuilder().child(child).build().transformPerFunction(); - } - - @Override - public List getChild() { - return ImmutableList.of(child); - } - - @Override - public T accept(AbstractNodeVisitor nodeVisitor, C context) { - return nodeVisitor.visitTimechart(this, context); - } - - /** - * Transform per function to eval-based post-processing on sum result by timechart. Specifically, - * calculate how many seconds are in the time bucket based on the span option dynamically, then - * divide the aggregated sum value by the number of seconds to get the per-second rate. - * - *

For example, with span=5m per_second(field): per second rate = sum(field) / 300 seconds - * - * @return eval+timechart if per function present, or the original timechart otherwise. - */ - private UnresolvedPlan transformPerFunction() { - Optional perFuncOpt = PerFunction.from(aggregateFunction); - if (perFuncOpt.isEmpty()) { - return this; - } - - PerFunction perFunc = perFuncOpt.get(); - Span span = (Span) this.binExpression; - Field spanStartTime = AstDSL.field(IMPLICIT_FIELD_TIMESTAMP); - Function spanEndTime = timestampadd(span.getUnit(), span.getValue(), spanStartTime); - Function spanMillis = timestampdiff(MILLISECOND, spanStartTime, spanEndTime); - final int SECOND_IN_MILLISECOND = 1000; - return eval( - timechart(AstDSL.alias(perFunc.aggName, sum(perFunc.aggArg))), - let(perFunc.aggName) - .multiply(perFunc.seconds * SECOND_IN_MILLISECOND) - .dividedBy(spanMillis)); - } - - private Timechart timechart(UnresolvedExpression newAggregateFunction) { - return this.toBuilder().aggregateFunction(newAggregateFunction).build(); - } - - @RequiredArgsConstructor - static class PerFunction { - private static final Map UNIT_SECONDS = - Map.of( - "per_second", 1, - "per_minute", 60, - "per_hour", 3600, - "per_day", 86400); - private final String aggName; - private final UnresolvedExpression aggArg; - private final int seconds; - - static Optional from(UnresolvedExpression aggExpr) { - if (!(aggExpr instanceof AggregateFunction)) { - return Optional.empty(); - } - - AggregateFunction aggFunc = (AggregateFunction) aggExpr; - String aggFuncName = aggFunc.getFuncName().toLowerCase(Locale.ROOT); - if (!UNIT_SECONDS.containsKey(aggFuncName)) { - return Optional.empty(); - } - - String aggName = toAggName(aggFunc); - return Optional.of( - new PerFunction(aggName, aggFunc.getField(), UNIT_SECONDS.get(aggFuncName))); - } - - private static String toAggName(AggregateFunction aggFunc) { - String fieldName = - (aggFunc.getField() instanceof Field) - ? ((Field) aggFunc.getField()).getField().toString() - : aggFunc.getField().toString(); - return String.format(Locale.ROOT, "%s(%s)", aggFunc.getFuncName(), fieldName); - } - } - - private PerFunctionRateExprBuilder let(String fieldName) { - return new PerFunctionRateExprBuilder(AstDSL.field(fieldName)); - } - - /** Fluent builder for creating Let expressions with mathematical operations. */ - static class PerFunctionRateExprBuilder { - private final Field field; - private UnresolvedExpression expr; - - PerFunctionRateExprBuilder(Field field) { - this.field = field; - this.expr = field; - } - - PerFunctionRateExprBuilder multiply(Integer multiplier) { - // Promote to double literal to avoid integer division in downstream - this.expr = - function( - MULTIPLY.getName().getFunctionName(), expr, doubleLiteral(multiplier.doubleValue())); - return this; - } - - Let dividedBy(UnresolvedExpression divisor) { - return AstDSL.let(field, function(DIVIDE.getName().getFunctionName(), expr, divisor)); - } - - static UnresolvedExpression sum(UnresolvedExpression field) { - return aggregate(SUM.getName().getFunctionName(), field); - } - - static Function timestampadd( - SpanUnit unit, UnresolvedExpression value, UnresolvedExpression timestampField) { - UnresolvedExpression intervalUnit = - stringLiteral(PlanUtils.spanUnitToIntervalUnit(unit).toString()); - return function( - TIMESTAMPADD.getName().getFunctionName(), intervalUnit, value, timestampField); - } - - static Function timestampdiff( - IntervalUnit unit, UnresolvedExpression start, UnresolvedExpression end) { - return function( - TIMESTAMPDIFF.getName().getFunctionName(), stringLiteral(unit.toString()), start, end); - } - } -} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index e2a0d7cd12d..2b3b1fd88e7 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -31,7 +31,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; -import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; @@ -2526,17 +2525,13 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.field(0), relBuilder.alias(columnSplitExpr, columnSplitName), relBuilder.field(2)); - String aggFunctionName = getAggFunctionName(node.getAggregationFunction()); - BuiltinFunctionName aggFunction = - BuiltinFunctionName.of(aggFunctionName) - .orElseThrow( - () -> - new IllegalArgumentException( - StringUtils.format( - "Unrecognized aggregation function: %s", aggFunctionName))); relBuilder.aggregate( relBuilder.groupKey(relBuilder.field(0), relBuilder.field(1)), - buildAggCall(context.relBuilder, aggFunction, relBuilder.field(2)).as(aggFieldName)); + buildAggCall( + context.relBuilder, + getAggFunctionName(node.getAggregationFunction()), + relBuilder.field(2)) + .as(aggFieldName)); // The output of chart is expected to be ordered by row and column split names relBuilder.sort(relBuilder.field(0), relBuilder.field(1)); return relBuilder.peek(); @@ -2578,6 +2573,37 @@ private RelNode rankByColumnSplit( return relBuilder.build(); } + /** + * Aggregate a field based on a given built-in aggregation function name. + * + *

It is intended for secondary aggregations in timechart and chart commands. Using it + * elsewhere may lead to unintended results. It handles explicitly only MIN, MAX, AVG, COUNT, + * DISTINCT_COUNT, EARLIEST, and LATEST. It sums the results for the rest aggregation types, + * assuming them to be accumulative. + */ + private AggCall buildAggCall(RelBuilder relBuilder, String aggFunctionName, RexNode node) { + BuiltinFunctionName aggFunction = + BuiltinFunctionName.of(aggFunctionName) + .orElseThrow( + () -> + new IllegalArgumentException( + StringUtils.format( + "Unrecognized aggregation function: %s", aggFunctionName))); + return switch (aggFunction) { + case MIN, EARLIEST -> relBuilder.min(node); + case MAX, LATEST -> relBuilder.max(node); + case AVG -> relBuilder.avg(node); + default -> relBuilder.sum(node); + }; + } + + private String getAggFunctionName(UnresolvedExpression aggregateFunction) { + if (aggregateFunction instanceof Alias alias) { + return getAggFunctionName(alias.getDelegated()); + } + return ((AggregateFunction) aggregateFunction).getFuncName(); + } + @AllArgsConstructor private static class ChartConfig { private final int limit; @@ -2600,355 +2626,6 @@ static ChartConfig fromArguments(ArgumentMap argMap) { } } - /** Transforms timechart command into SQL-based operations. */ - @Override - public RelNode visitTimechart( - org.opensearch.sql.ast.tree.Timechart node, CalcitePlanContext context) { - visitChildren(node, context); - - // Extract parameters - UnresolvedExpression spanExpr = node.getBinExpression(); - - List groupExprList; - - // Handle no by field case - if (node.getByField() == null) { - String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); - - // Create group expression list with just the timestamp span but use a different alias - // to avoid @timestamp naming conflict - List simpleGroupExprList = new ArrayList<>(); - simpleGroupExprList.add(new Alias("timestamp", spanExpr)); - // Create agg expression list with the aggregate function - List simpleAggExprList = - List.of(new Alias(aggFieldAlias, node.getAggregateFunction())); - // Create an Aggregation object - Aggregation aggregation = - new Aggregation( - simpleAggExprList, - Collections.emptyList(), - simpleGroupExprList, - null, - Collections.emptyList()); - // Use visitAggregation to handle the aggregation and column naming - RelNode result = visitAggregation(aggregation, context); - // Push the result and add explicit projection to get [@timestamp, count] order - context.relBuilder.push(result); - // Reorder fields: timestamp first, then count - context.relBuilder.project( - context.relBuilder.field("timestamp"), context.relBuilder.field(aggFieldAlias)); - // Rename timestamp to @timestamp - context.relBuilder.rename(List.of("@timestamp", aggFieldAlias)); - - context.relBuilder.sort(context.relBuilder.field(0)); - return context.relBuilder.peek(); - } - - // Extract parameters for byField case - UnresolvedExpression byField = node.getByField(); - String byFieldName = ((Field) byField).getField().toString(); - String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); - - int limit = Optional.ofNullable(node.getLimit()).orElse(10); - boolean useOther = Optional.ofNullable(node.getUseOther()).orElse(true); - - try { - // Step 1: Initial aggregation - IMPORTANT: order is [spanExpr, byField] - groupExprList = Arrays.asList(spanExpr, byField); - aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context, false); - - // First rename the timestamp field (2nd to last) to @timestamp - List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); - List renamedFields = new ArrayList<>(fieldNames); - // TODO: Fix aggregateWithTrimming reordering - renamedFields.set(fieldNames.size() - 2, "@timestamp"); - context.relBuilder.rename(renamedFields); - - // Then reorder: @timestamp first, then byField, then value function - List outputFields = context.relBuilder.fields(); - List reordered = new ArrayList<>(); - reordered.add(context.relBuilder.field("@timestamp")); // timestamp first - reordered.add(context.relBuilder.field(byFieldName)); // byField second - reordered.add(outputFields.get(outputFields.size() - 1)); // value function last - context.relBuilder.project(reordered); - - // Handle no limit case - just sort and return with proper field aliases - if (limit == 0) { - // Add final projection with proper aliases: [@timestamp, byField, aggFieldAlias] - context.relBuilder.project( - context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), - context.relBuilder.alias(context.relBuilder.field(1), byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); - context.relBuilder.sort(context.relBuilder.field(0), context.relBuilder.field(1)); - return context.relBuilder.peek(); - } - - // Use known field positions after reordering: 0=@timestamp, 1=byField, 2=value - RelNode completeResults = context.relBuilder.build(); - - // Step 2: Find top N categories using window function approach (more efficient than separate - // aggregation) - String aggFunctionName = getAggFunctionName(node.getAggregateFunction()); - Optional aggFuncNameOptional = BuiltinFunctionName.of(aggFunctionName); - if (aggFuncNameOptional.isEmpty()) { - throw new IllegalArgumentException( - StringUtils.format("Unrecognized aggregation function: %s", aggFunctionName)); - } - BuiltinFunctionName aggFunction = aggFuncNameOptional.get(); - RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, aggFunction, context); - - // Step 3: Apply OTHER logic with single pass - return buildFinalResultWithOther( - completeResults, - topCategories, - byFieldName, - aggFunction, - aggFieldAlias, - useOther, - limit, - context); - - } catch (Exception e) { - throw new RuntimeException("Error in visitTimechart: " + e.getMessage(), e); - } - } - - private String getAggFunctionName(UnresolvedExpression aggregateFunction) { - if (aggregateFunction instanceof Alias alias) { - return getAggFunctionName(alias.getDelegated()); - } - return ((AggregateFunction) aggregateFunction).getFuncName(); - } - - /** Build top categories query - simpler approach that works better with OTHER handling */ - private RelNode buildTopCategoriesQuery( - RelNode completeResults, - int limit, - BuiltinFunctionName aggFunction, - CalcitePlanContext context) { - context.relBuilder.push(completeResults); - - // Filter out null values when determining top categories - null should not count towards limit - context.relBuilder.filter(context.relBuilder.isNotNull(context.relBuilder.field(1))); - - // Get totals for non-null categories - field positions: 0=@timestamp, 1=byField, 2=value - RexInputRef valueField = context.relBuilder.field(2); - AggCall call = buildAggCall(context.relBuilder, aggFunction, valueField); - - context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(1)), call.as("grand_total")); - - // Apply sorting and limit to non-null categories only - RexNode sortField = context.relBuilder.field("grand_total"); - // For MIN and EARLIEST, top results should be the minimum ones - sortField = - aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST - ? sortField - : context.relBuilder.desc(sortField); - context.relBuilder.sort(sortField); - if (limit > 0) { - context.relBuilder.limit(0, limit); - } - return context.relBuilder.build(); - } - - /** Build final result with OTHER category using efficient single-pass approach */ - private RelNode buildFinalResultWithOther( - RelNode completeResults, - RelNode topCategories, - String byFieldName, - BuiltinFunctionName aggFunction, - String aggFieldAlias, - boolean useOther, - int limit, - CalcitePlanContext context) { - - // Use zero-filling for count aggregations, standard result for others - if (aggFieldAlias.equals("count")) { - return buildZeroFilledResult( - completeResults, topCategories, byFieldName, aggFieldAlias, useOther, limit, context); - } else { - return buildStandardResult( - completeResults, - topCategories, - byFieldName, - aggFunction, - aggFieldAlias, - useOther, - context); - } - } - - /** Build standard result without zero-filling */ - private RelNode buildStandardResult( - RelNode completeResults, - RelNode topCategories, - String byFieldName, - BuiltinFunctionName aggFunctionName, - String aggFieldAlias, - boolean useOther, - CalcitePlanContext context) { - - context.relBuilder.push(completeResults); - context.relBuilder.push(topCategories); - - // LEFT JOIN to identify top categories - field positions: 0=@timestamp, 1=byField, 2=value - context.relBuilder.join( - org.apache.calcite.rel.core.JoinRelType.LEFT, - context.relBuilder.equals( - context.relBuilder.field(2, 0, 1), context.relBuilder.field(2, 1, 0))); - - // Calculate field position after join - int topCategoryFieldIndex = completeResults.getRowType().getFieldCount(); - - // Create CASE expression for OTHER logic - RexNode categoryExpr = createOtherCaseExpression(topCategoryFieldIndex, 1, context); - - // Project and aggregate - context.relBuilder.project( - context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), - context.relBuilder.alias(categoryExpr, byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); - - RexInputRef valueField = context.relBuilder.field(2); - AggCall aggCall = buildAggCall(context.relBuilder, aggFunctionName, valueField); - context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - aggCall.as(aggFieldAlias)); - - applyFiltersAndSort(useOther, context); - return context.relBuilder.peek(); - } - - /** Helper to create OTHER case expression - preserves NULL as a category */ - private RexNode createOtherCaseExpression( - int topCategoryFieldIndex, int byIndex, CalcitePlanContext context) { - return context.relBuilder.call( - org.apache.calcite.sql.fun.SqlStdOperatorTable.CASE, - context.relBuilder.isNotNull(context.relBuilder.field(topCategoryFieldIndex)), - context.relBuilder.field(byIndex), // Keep original value (including NULL) - context.relBuilder.call( - org.apache.calcite.sql.fun.SqlStdOperatorTable.CASE, - context.relBuilder.isNull(context.relBuilder.field(byIndex)), - context.relBuilder.literal(null), // Preserve NULL as NULL - context.relBuilder.literal("OTHER"))); - } - - /** Helper to apply filters and sorting */ - private void applyFiltersAndSort(boolean useOther, CalcitePlanContext context) { - if (!useOther) { - context.relBuilder.filter( - context.relBuilder.notEquals( - context.relBuilder.field(1), context.relBuilder.literal("OTHER"))); - } - context.relBuilder.sort(context.relBuilder.field(0), context.relBuilder.field(1)); - } - - /** Build zero-filled result using fillnull pattern - treat NULL as just another category */ - private RelNode buildZeroFilledResult( - RelNode completeResults, - RelNode topCategories, - String byFieldName, - String aggFieldAlias, - boolean useOther, - int limit, - CalcitePlanContext context) { - - // Get all unique timestamps - field positions: 0=@timestamp, 1=byField, 2=value - context.relBuilder.push(completeResults); - context.relBuilder.aggregate(context.relBuilder.groupKey(context.relBuilder.field(0))); - RelNode allTimestamps = context.relBuilder.build(); - - // Get all categories for zero-filling - apply OTHER logic here too - context.relBuilder.push(completeResults); - context.relBuilder.push(topCategories); - context.relBuilder.join( - org.apache.calcite.rel.core.JoinRelType.LEFT, - context.relBuilder.call( - org.apache.calcite.sql.fun.SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, - context.relBuilder.field(2, 0, 1), - context.relBuilder.field(2, 1, 0))); - - int topCategoryFieldIndex = completeResults.getRowType().getFieldCount(); - RexNode categoryExpr = createOtherCaseExpression(topCategoryFieldIndex, 1, context); - - context.relBuilder.project(categoryExpr); - context.relBuilder.aggregate(context.relBuilder.groupKey(context.relBuilder.field(0))); - RelNode allCategories = context.relBuilder.build(); - - // Cross join timestamps with ALL categories (including OTHER) for zero-filling - context.relBuilder.push(allTimestamps); - context.relBuilder.push(allCategories); - context.relBuilder.join( - org.apache.calcite.rel.core.JoinRelType.INNER, context.relBuilder.literal(true)); - - // Create zero-filled combinations with count=0 - context.relBuilder.project( - context.relBuilder.alias( - context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), - "@timestamp"), - context.relBuilder.alias(context.relBuilder.field(1), byFieldName), - context.relBuilder.alias(context.relBuilder.literal(0), aggFieldAlias)); - RelNode zeroFilledCombinations = context.relBuilder.build(); - - // Get actual results with OTHER logic applied - context.relBuilder.push(completeResults); - context.relBuilder.push(topCategories); - context.relBuilder.join( - org.apache.calcite.rel.core.JoinRelType.LEFT, - // Use IS NOT DISTINCT FROM for proper null handling in join - context.relBuilder.call( - org.apache.calcite.sql.fun.SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, - context.relBuilder.field(2, 0, 1), - context.relBuilder.field(2, 1, 0))); - - int actualTopCategoryFieldIndex = completeResults.getRowType().getFieldCount(); - RexNode actualCategoryExpr = createOtherCaseExpression(actualTopCategoryFieldIndex, 1, context); - - context.relBuilder.project( - context.relBuilder.alias( - context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), - "@timestamp"), - context.relBuilder.alias(actualCategoryExpr, byFieldName), - context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); - - context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as("actual_count")); - RelNode actualResults = context.relBuilder.build(); - - // UNION zero-filled with actual results - context.relBuilder.push(actualResults); - context.relBuilder.push(zeroFilledCombinations); - context.relBuilder.union(false); - - // Aggregate to combine actual and zero-filled data - context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), - context.relBuilder.sum(context.relBuilder.field(2)).as(aggFieldAlias)); - - applyFiltersAndSort(useOther, context); - return context.relBuilder.peek(); - } - - /** - * Aggregate a field based on a given built-in aggregation function name. - * - *

It is intended for secondary aggregations in timechart and chart commands. Using it - * elsewhere may lead to unintended results. It handles explicitly only MIN, MAX, AVG, COUNT, - * DISTINCT_COUNT, EARLIEST, and LATEST. It sums the results for the rest aggregation types, - * assuming them to be accumulative. - */ - private AggCall buildAggCall( - RelBuilder relBuilder, BuiltinFunctionName aggFunction, RexNode node) { - return switch (aggFunction) { - case MIN, EARLIEST -> relBuilder.min(node); - case MAX, LATEST -> relBuilder.max(node); - case AVG -> relBuilder.avg(node); - default -> relBuilder.sum(node); - }; - } - @Override public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/test/java/org/opensearch/sql/ast/tree/TimechartTest.java b/core/src/test/java/org/opensearch/sql/ast/tree/PerFunctionsTest.java similarity index 70% rename from core/src/test/java/org/opensearch/sql/ast/tree/TimechartTest.java rename to core/src/test/java/org/opensearch/sql/ast/tree/PerFunctionsTest.java index d587ff71787..2fa3bc2cd24 100644 --- a/core/src/test/java/org/opensearch/sql/ast/tree/TimechartTest.java +++ b/core/src/test/java/org/opensearch/sql/ast/tree/PerFunctionsTest.java @@ -13,7 +13,9 @@ import static org.opensearch.sql.ast.dsl.AstDSL.function; import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.relation; +import static org.opensearch.sql.calcite.plan.OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP; +import java.util.List; import java.util.stream.Stream; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -21,13 +23,15 @@ import org.junit.jupiter.params.provider.MethodSource; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.AggregateFunction; +import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.Let; +import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.Span; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.UnresolvedExpression; +import org.opensearch.sql.expression.function.BuiltinFunctionName; -class TimechartTest { - +class PerFunctionsTest { /** * @return test sources for per_* function test. */ @@ -57,8 +61,9 @@ void should_transform_per_second_for_different_spans( multiply("per_second(bytes)", 1000.0), timestampdiff( "MILLISECOND", - "@timestamp", - timestampadd(expectedIntervalUnit, spanValue, "@timestamp")))), + IMPLICIT_FIELD_TIMESTAMP, + timestampadd( + expectedIntervalUnit, spanValue, IMPLICIT_FIELD_TIMESTAMP)))), timechart(span(spanValue, spanUnit), alias("per_second(bytes)", sum("bytes"))))); } @@ -76,8 +81,9 @@ void should_transform_per_minute_for_different_spans( multiply("per_minute(bytes)", 60000.0), timestampdiff( "MILLISECOND", - "@timestamp", - timestampadd(expectedIntervalUnit, spanValue, "@timestamp")))), + IMPLICIT_FIELD_TIMESTAMP, + timestampadd( + expectedIntervalUnit, spanValue, IMPLICIT_FIELD_TIMESTAMP)))), timechart(span(spanValue, spanUnit), alias("per_minute(bytes)", sum("bytes"))))); } @@ -95,8 +101,9 @@ void should_transform_per_hour_for_different_spans( multiply("per_hour(bytes)", 3600000.0), timestampdiff( "MILLISECOND", - "@timestamp", - timestampadd(expectedIntervalUnit, spanValue, "@timestamp")))), + IMPLICIT_FIELD_TIMESTAMP, + timestampadd( + expectedIntervalUnit, spanValue, IMPLICIT_FIELD_TIMESTAMP)))), timechart(span(spanValue, spanUnit), alias("per_hour(bytes)", sum("bytes"))))); } @@ -114,8 +121,9 @@ void should_transform_per_day_for_different_spans( multiply("per_day(bytes)", 8.64E7), timestampdiff( "MILLISECOND", - "@timestamp", - timestampadd(expectedIntervalUnit, spanValue, "@timestamp")))), + IMPLICIT_FIELD_TIMESTAMP, + timestampadd( + expectedIntervalUnit, spanValue, IMPLICIT_FIELD_TIMESTAMP)))), timechart(span(spanValue, spanUnit), alias("per_day(bytes)", sum("bytes"))))); } @@ -128,19 +136,27 @@ void should_not_transform_non_per_functions() { @Test void should_preserve_all_fields_during_per_function_transformation() { - Timechart original = - new Timechart(relation("logs"), perSecond("bytes")) - .span(span(5, "m")) - .by(field("status")) - .limit(20) - .useOther(false); - - Timechart expected = - new Timechart(relation("logs"), alias("per_second(bytes)", sum("bytes"))) - .span(span(5, "m")) - .by(field("status")) - .limit(20) - .useOther(false); + Chart original = + Chart.builder() + .child(relation("logs")) + .aggregationFunction(perSecond("bytes")) + .rowSplit(span(5, "m")) + .columnSplit(field("status")) + .arguments( + List.of( + new Argument("limit", intLiteral(20)), new Argument("useOther", Literal.FALSE))) + .build(); + + Chart expected = + Chart.builder() + .child(relation("logs")) + .aggregationFunction(alias("per_second(bytes)", sum("bytes"))) + .rowSplit(span(5, "m")) + .columnSplit(field("status")) + .arguments( + List.of( + new Argument("limit", intLiteral(20)), new Argument("useOther", Literal.FALSE))) + .build(); withTimechart(original) .whenTransformingPerFunction() @@ -151,7 +167,9 @@ void should_preserve_all_fields_during_per_function_transformation() { divide( multiply("per_second(bytes)", 1000.0), timestampdiff( - "MILLISECOND", "@timestamp", timestampadd("MINUTE", 5, "@timestamp")))), + "MILLISECOND", + IMPLICIT_FIELD_TIMESTAMP, + timestampadd("MINUTE", 5, IMPLICIT_FIELD_TIMESTAMP)))), expected)); } @@ -161,17 +179,21 @@ private static TransformationAssertion withTimechart(Span spanExpr, AggregateFun return new TransformationAssertion(timechart(spanExpr, aggFunc)); } - private static TransformationAssertion withTimechart(Timechart timechart) { + private static TransformationAssertion withTimechart(Chart timechart) { return new TransformationAssertion(timechart); } - private static Timechart timechart(Span spanExpr, UnresolvedExpression aggExpr) { + private static Chart timechart(Span spanExpr, UnresolvedExpression aggExpr) { // Set child here because expected object won't call attach below - return new Timechart(relation("t"), aggExpr).span(spanExpr).limit(10).useOther(true); + return Chart.builder() + .child(relation("t")) + .aggregationFunction(aggExpr) + .rowSplit(spanExpr) + .build(); } private static Span span(int value, String unit) { - return AstDSL.span(field("@timestamp"), intLiteral(value), SpanUnit.of(unit)); + return AstDSL.span(AstDSL.implicitTimestampField(), intLiteral(value), SpanUnit.of(unit)); } private static AggregateFunction perSecond(String fieldName) { @@ -209,23 +231,31 @@ private static UnresolvedExpression divide( private static UnresolvedExpression timestampadd(String unit, int value, String timestampField) { return function( - "timestampadd", AstDSL.stringLiteral(unit), intLiteral(value), field(timestampField)); + BuiltinFunctionName.TIMESTAMPADD.getName().getFunctionName(), + AstDSL.stringLiteral(unit), + intLiteral(value), + field(timestampField)); } private static UnresolvedExpression timestampdiff( String unit, String startField, UnresolvedExpression end) { - return function("timestampdiff", AstDSL.stringLiteral(unit), field(startField), end); + + return function( + BuiltinFunctionName.TIMESTAMPDIFF.getName().getFunctionName(), + AstDSL.stringLiteral(unit), + field(startField), + end); } - private static UnresolvedPlan eval(Let letExpr, Timechart timechartExpr) { + private static UnresolvedPlan eval(Let letExpr, Chart timechartExpr) { return AstDSL.eval(timechartExpr, letExpr); } private static class TransformationAssertion { - private final Timechart timechart; + private final Chart timechart; private UnresolvedPlan result; - TransformationAssertion(Timechart timechart) { + TransformationAssertion(Chart timechart) { this.timechart = timechart; } diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index 3b277cd978f..7584c72505e 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,31 +35,33 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 20/20 - +----------------+-------------+------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ - | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | - |----------------+-------------+------------------+------------+---------+----------+------------+-----------+---------------------------+----------------| - | docTestCluster | null | .ql-datasources | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | account2 | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | events | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | occupation | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | otellogs | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | state_country | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | time_data | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | time_data2 | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | time_test | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | weblogs | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | wildcard | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | work_information | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | worker | BASE TABLE | null | null | null | null | null | null | - +----------------+-------------+------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ + fetched rows / total rows = 22/22 + +----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ + | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | + |----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------| + | docTestCluster | null | .ql-datasources | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | account2 | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | events | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | events_many_hosts | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | events_null | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | occupation | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | otellogs | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | state_country | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | time_data | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | time_data2 | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | time_test | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | weblogs | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | wildcard | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | work_information | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | worker | BASE TABLE | null | null | null | null | null | null | + +----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ Example 2: Show Specific Index Information ------------------------------------------ diff --git a/docs/user/ppl/cmd/timechart.rst b/docs/user/ppl/cmd/timechart.rst index 512fa76370c..6aea1270f1e 100644 --- a/docs/user/ppl/cmd/timechart.rst +++ b/docs/user/ppl/cmd/timechart.rst @@ -132,109 +132,94 @@ PPL query:: os> source=events | timechart span=1h count() by host fetched rows / total rows = 2/2 - +---------------------+---------+-------+ - | @timestamp | host | count | - |---------------------+---------+-------| - | 2023-01-01 10:00:00 | server1 | 4 | - | 2023-01-01 10:00:00 | server2 | 4 | - +---------------------+---------+-------+ - -Example 2: Count events by minute with zero-filled results + +---------------------+---------+---------+ + | @timestamp | host | count() | + |---------------------+---------+---------| + | 2023-01-01 10:00:00 | server1 | 4 | + | 2023-01-01 10:00:00 | server2 | 4 | + +---------------------+---------+---------+ + +Example 2: Count events by minute ========================================================== -This example counts events for each minute and groups them by host, showing zero values for time-host combinations with no data. +This example counts events for each minute and groups them by host. PPL query:: os> source=events | timechart span=1m count() by host - fetched rows / total rows = 16/16 - +---------------------+---------+-------+ - | @timestamp | host | count | - |---------------------+---------+-------| - | 2023-01-01 10:00:00 | server1 | 1 | - | 2023-01-01 10:00:00 | server2 | 0 | - | 2023-01-01 10:05:00 | server1 | 0 | - | 2023-01-01 10:05:00 | server2 | 1 | - | 2023-01-01 10:10:00 | server1 | 1 | - | 2023-01-01 10:10:00 | server2 | 0 | - | 2023-01-01 10:15:00 | server1 | 0 | - | 2023-01-01 10:15:00 | server2 | 1 | - | 2023-01-01 10:20:00 | server1 | 1 | - | 2023-01-01 10:20:00 | server2 | 0 | - | 2023-01-01 10:25:00 | server1 | 0 | - | 2023-01-01 10:25:00 | server2 | 1 | - | 2023-01-01 10:30:00 | server1 | 1 | - | 2023-01-01 10:30:00 | server2 | 0 | - | 2023-01-01 10:35:00 | server1 | 0 | - | 2023-01-01 10:35:00 | server2 | 1 | - +---------------------+---------+-------+ - -Example 3: Calculate average CPU usage by minute + fetched rows / total rows = 8/8 + +---------------------+---------+---------+ + | @timestamp | host | count() | + |---------------------+---------+---------| + | 2023-01-01 10:00:00 | server1 | 1 | + | 2023-01-01 10:05:00 | server2 | 1 | + | 2023-01-01 10:10:00 | server1 | 1 | + | 2023-01-01 10:15:00 | server2 | 1 | + | 2023-01-01 10:20:00 | server1 | 1 | + | 2023-01-01 10:25:00 | server2 | 1 | + | 2023-01-01 10:30:00 | server1 | 1 | + | 2023-01-01 10:35:00 | server2 | 1 | + +---------------------+---------+---------+ + +Example 3: Calculate average number of packets by minute ================================================ -This example calculates the average CPU usage for each minute without grouping by any field. +This example calculates the average packets for each minute without grouping by any field. PPL query:: - PPL> source=events | timechart span=1m avg(cpu_usage) - fetched rows / total rows = 5/5 - +---------------------+------------------+ - | @timestamp | avg(cpu_usage) | - +---------------------+------------------+ - | 2024-07-01 00:00:00 | 45.2 | - | 2024-07-01 00:01:00 | 38.7 | - | 2024-07-01 00:02:00 | 55.3 | - | 2024-07-01 00:03:00 | 42.1 | - | 2024-07-01 00:04:00 | 41.8 | - +---------------------+------------------+ - -Example 4: Calculate average CPU usage by second and region + os> source=events | timechart span=1m avg(packets) + fetched rows / total rows = 8/8 + +---------------------+--------------+ + | @timestamp | avg(packets) | + |---------------------+--------------| + | 2023-01-01 10:00:00 | 60.0 | + | 2023-01-01 10:05:00 | 30.0 | + | 2023-01-01 10:10:00 | 60.0 | + | 2023-01-01 10:15:00 | 30.0 | + | 2023-01-01 10:20:00 | 60.0 | + | 2023-01-01 10:25:00 | 30.0 | + | 2023-01-01 10:30:00 | 180.0 | + | 2023-01-01 10:35:00 | 90.0 | + +---------------------+--------------+ + +Example 4: Calculate average number of packets by every 20 minutes and status =========================================================== -This example calculates the average CPU usage for each second and groups them by region. +This example calculates the average number of packets for every 20 minutes and groups them by status. PPL query:: - PPL> source=events | timechart span=1s avg(cpu_usage) by region - fetched rows / total rows = 5/5 - +---------------------+---------+------------------+ - | @timestamp | region | avg(cpu_usage) | - +---------------------+---------+------------------+ - | 2024-07-01 00:00:00 | us-east | 45.2 | - | 2024-07-01 00:01:00 | us-west | 38.7 | - | 2024-07-01 00:02:00 | us-east | 55.3 | - | 2024-07-01 00:03:00 | eu-west | 42.1 | - | 2024-07-01 00:04:00 | us-west | 41.8 | - +---------------------+---------+------------------+ - -Example 5: Count events by second and region with zero-filled results + os> source=events | timechart span=20m avg(packets) by status + fetched rows / total rows = 8/8 + +---------------------+------------+--------------+ + | @timestamp | status | avg(packets) | + |---------------------+------------+--------------| + | 2023-01-01 10:00:00 | active | 30.0 | + | 2023-01-01 10:00:00 | inactive | 30.0 | + | 2023-01-01 10:00:00 | pending | 60.0 | + | 2023-01-01 10:00:00 | processing | 60.0 | + | 2023-01-01 10:20:00 | cancelled | 180.0 | + | 2023-01-01 10:20:00 | completed | 60.0 | + | 2023-01-01 10:20:00 | inactive | 90.0 | + | 2023-01-01 10:20:00 | pending | 30.0 | + +---------------------+------------+--------------+ + +Example 5: Count events by hour and category ===================================================================== -This example counts events for each second and groups them by region, showing zero values for time-region combinations with no data. +This example counts events for each second and groups them by category PPL query:: - PPL> source=events | timechart span=1s count() by region - fetched rows / total rows = 15/15 - +---------------------+---------+-------+ - | @timestamp | region | count | - +---------------------+---------+-------+ - | 2024-07-01 00:00:00 | us-east | 1 | - | 2024-07-01 00:00:00 | us-west | 0 | - | 2024-07-01 00:00:00 | eu-west | 0 | - | 2024-07-01 00:01:00 | us-east | 0 | - | 2024-07-01 00:01:00 | us-west | 1 | - | 2024-07-01 00:01:00 | eu-west | 0 | - | 2024-07-01 00:02:00 | us-east | 1 | - | 2024-07-01 00:02:00 | us-west | 0 | - | 2024-07-01 00:02:00 | eu-west | 0 | - | 2024-07-01 00:03:00 | us-east | 0 | - | 2024-07-01 00:03:00 | us-west | 0 | - | 2024-07-01 00:03:00 | eu-west | 1 | - | 2024-07-01 00:04:00 | us-east | 0 | - | 2024-07-01 00:04:00 | us-west | 1 | - | 2024-07-01 00:04:00 | eu-west | 0 | - +---------------------+---------+-------+ + os> source=events | timechart span=1h count() by category + fetched rows / total rows = 2/2 + +---------------------+----------+---------+ + | @timestamp | category | count() | + |---------------------+----------+---------| + | 2023-01-01 10:00:00 | orders | 4 | + | 2023-01-01 10:00:00 | users | 4 | + +---------------------+----------+---------+ Example 6: Using the limit parameter with count() function ========================================================== @@ -245,27 +230,19 @@ This query will display the top 2 hosts with the highest count values, and group PPL query:: os> source=events | timechart span=1m limit=2 count() by host - fetched rows / total rows = 16/16 - +---------------------+---------+-------+ - | @timestamp | host | count | - |---------------------+---------+-------| - | 2023-01-01 10:00:00 | server1 | 1 | - | 2023-01-01 10:00:00 | server2 | 0 | - | 2023-01-01 10:05:00 | server1 | 0 | - | 2023-01-01 10:05:00 | server2 | 1 | - | 2023-01-01 10:10:00 | server1 | 1 | - | 2023-01-01 10:10:00 | server2 | 0 | - | 2023-01-01 10:15:00 | server1 | 0 | - | 2023-01-01 10:15:00 | server2 | 1 | - | 2023-01-01 10:20:00 | server1 | 1 | - | 2023-01-01 10:20:00 | server2 | 0 | - | 2023-01-01 10:25:00 | server1 | 0 | - | 2023-01-01 10:25:00 | server2 | 1 | - | 2023-01-01 10:30:00 | server1 | 1 | - | 2023-01-01 10:30:00 | server2 | 0 | - | 2023-01-01 10:35:00 | server1 | 0 | - | 2023-01-01 10:35:00 | server2 | 1 | - +---------------------+---------+-------+ + fetched rows / total rows = 8/8 + +---------------------+---------+---------+ + | @timestamp | host | count() | + |---------------------+---------+---------| + | 2023-01-01 10:00:00 | server1 | 1 | + | 2023-01-01 10:05:00 | server2 | 1 | + | 2023-01-01 10:10:00 | server1 | 1 | + | 2023-01-01 10:15:00 | server2 | 1 | + | 2023-01-01 10:20:00 | server1 | 1 | + | 2023-01-01 10:25:00 | server2 | 1 | + | 2023-01-01 10:30:00 | server1 | 1 | + | 2023-01-01 10:35:00 | server2 | 1 | + +---------------------+---------+---------+ Example 7: Using limit=0 with count() to show all values ======================================================== @@ -274,23 +251,23 @@ To display all distinct values without any limit, set limit=0: PPL query:: - PPL> source=events_many_hosts | timechart span=1h limit=0 count() by host + os> source=events_many_hosts | timechart span=1h limit=0 count() by host fetched rows / total rows = 11/11 - +---------------------+--------+-------+ - | @timestamp | host | count | - +---------------------+--------+-------+ - | 2024-07-01 00:00:00 | web-01 | 1 | - | 2024-07-01 00:00:00 | web-02 | 1 | - | 2024-07-01 00:00:00 | web-03 | 1 | - | 2024-07-01 00:00:00 | web-04 | 1 | - | 2024-07-01 00:00:00 | web-05 | 1 | - | 2024-07-01 00:00:00 | web-06 | 1 | - | 2024-07-01 00:00:00 | web-07 | 1 | - | 2024-07-01 00:00:00 | web-08 | 1 | - | 2024-07-01 00:00:00 | web-09 | 1 | - | 2024-07-01 00:00:00 | web-10 | 1 | - | 2024-07-01 00:00:00 | web-11 | 1 | - +---------------------+--------+-------+ + +---------------------+--------+---------+ + | @timestamp | host | count() | + |---------------------+--------+---------| + | 2024-07-01 00:00:00 | web-01 | 1 | + | 2024-07-01 00:00:00 | web-02 | 1 | + | 2024-07-01 00:00:00 | web-03 | 1 | + | 2024-07-01 00:00:00 | web-04 | 1 | + | 2024-07-01 00:00:00 | web-05 | 1 | + | 2024-07-01 00:00:00 | web-06 | 1 | + | 2024-07-01 00:00:00 | web-07 | 1 | + | 2024-07-01 00:00:00 | web-08 | 1 | + | 2024-07-01 00:00:00 | web-09 | 1 | + | 2024-07-01 00:00:00 | web-10 | 1 | + | 2024-07-01 00:00:00 | web-11 | 1 | + +---------------------+--------+---------+ This shows all 11 hosts as separate rows without an "OTHER" category. @@ -301,22 +278,22 @@ Limit to top 10 hosts without OTHER category (useother=false): PPL query:: - PPL> source=events_many_hosts | timechart span=1h useother=false count() by host + os> source=events_many_hosts | timechart span=1h useother=false count() by host fetched rows / total rows = 10/10 - +---------------------+--------+-------+ - | @timestamp | host | count | - +---------------------+--------+-------+ - | 2024-07-01 00:00:00 | web-01 | 1 | - | 2024-07-01 00:00:00 | web-02 | 1 | - | 2024-07-01 00:00:00 | web-03 | 1 | - | 2024-07-01 00:00:00 | web-04 | 1 | - | 2024-07-01 00:00:00 | web-05 | 1 | - | 2024-07-01 00:00:00 | web-06 | 1 | - | 2024-07-01 00:00:00 | web-07 | 1 | - | 2024-07-01 00:00:00 | web-08 | 1 | - | 2024-07-01 00:00:00 | web-09 | 1 | - | 2024-07-01 00:00:00 | web-10 | 1 | - +---------------------+--------+-------+ + +---------------------+--------+---------+ + | @timestamp | host | count() | + |---------------------+--------+---------| + | 2024-07-01 00:00:00 | web-01 | 1 | + | 2024-07-01 00:00:00 | web-02 | 1 | + | 2024-07-01 00:00:00 | web-03 | 1 | + | 2024-07-01 00:00:00 | web-04 | 1 | + | 2024-07-01 00:00:00 | web-05 | 1 | + | 2024-07-01 00:00:00 | web-06 | 1 | + | 2024-07-01 00:00:00 | web-07 | 1 | + | 2024-07-01 00:00:00 | web-08 | 1 | + | 2024-07-01 00:00:00 | web-09 | 1 | + | 2024-07-01 00:00:00 | web-10 | 1 | + +---------------------+--------+---------+ Example 9: Using limit with useother parameter and avg() function ================================================================= @@ -325,48 +302,49 @@ Limit to top 3 hosts with OTHER category (default useother=true): PPL query:: - PPL> source=events_many_hosts | timechart span=1h limit=3 avg(cpu_usage) by host + os> source=events_many_hosts | timechart span=1h limit=3 avg(cpu_usage) by host fetched rows / total rows = 4/4 - +---------------------+--------+------------------+ - | @timestamp | host | avg(cpu_usage) | - +---------------------+--------+------------------+ - | 2024-07-01 00:00:00 | web-03 | 55.3 | - | 2024-07-01 00:00:00 | web-07 | 48.6 | - | 2024-07-01 00:00:00 | web-09 | 67.8 | - | 2024-07-01 00:00:00 | OTHER | 330.4 | - +---------------------+--------+------------------+ + +---------------------+--------+----------------+ + | @timestamp | host | avg(cpu_usage) | + |---------------------+--------+----------------| + | 2024-07-01 00:00:00 | OTHER | 41.3 | + | 2024-07-01 00:00:00 | web-03 | 55.3 | + | 2024-07-01 00:00:00 | web-07 | 48.6 | + | 2024-07-01 00:00:00 | web-09 | 67.8 | + +---------------------+--------+----------------+ Limit to top 3 hosts without OTHER category (useother=false): PPL query:: - PPL> source=events_many_hosts | timechart span=1h limit=3 useother=false avg(cpu_usage) by host + os> source=events_many_hosts | timechart span=1h limit=3 useother=false avg(cpu_usage) by host fetched rows / total rows = 3/3 - +---------------------+--------+------------------+ - | @timestamp | host | avg(cpu_usage) | - +---------------------+--------+------------------+ - | 2024-07-01 00:00:00 | web-03 | 55.3 | - | 2024-07-01 00:00:00 | web-07 | 48.6 | - | 2024-07-01 00:00:00 | web-09 | 67.8 | - +---------------------+--------+------------------+ + +---------------------+--------+----------------+ + | @timestamp | host | avg(cpu_usage) | + |---------------------+--------+----------------| + | 2024-07-01 00:00:00 | web-03 | 55.3 | + | 2024-07-01 00:00:00 | web-07 | 48.6 | + | 2024-07-01 00:00:00 | web-09 | 67.8 | + +---------------------+--------+----------------+ Example 10: Handling null values in the "by" field ================================================== This example shows how null values in the "by" field are treated as a separate category. The dataset events_null has 1 entry that does not have a host field. +It is put into a separate "NULL" category because the defaults for ``usenull`` and ``nullstr`` are ``true`` and ``"NULL"`` respectively. PPL query:: - PPL> source=events_null | timechart span=1h count() by host + os> source=events_null | timechart span=1h count() by host fetched rows / total rows = 4/4 - +---------------------+--------+-------+ - | @timestamp | host | count | - +---------------------+--------+-------+ - | 2024-07-01 00:00:00 | db-01 | 1 | - | 2024-07-01 00:00:00 | web-01 | 2 | - | 2024-07-01 00:00:00 | web-02 | 2 | - | 2024-07-01 00:00:00 | null | 1 | - +---------------------+--------+-------+ + +---------------------+--------+---------+ + | @timestamp | host | count() | + |---------------------+--------+---------| + | 2024-07-01 00:00:00 | NULL | 1 | + | 2024-07-01 00:00:00 | db-01 | 1 | + | 2024-07-01 00:00:00 | web-01 | 2 | + | 2024-07-01 00:00:00 | web-02 | 2 | + +---------------------+--------+---------+ Example 11: Calculate packets per second rate ============================================= diff --git a/doctest/test_data/events_many_hosts.json b/doctest/test_data/events_many_hosts.json new file mode 100644 index 00000000000..9c1bd8974a6 --- /dev/null +++ b/doctest/test_data/events_many_hosts.json @@ -0,0 +1,22 @@ +{"index":{"_id":"1"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-01","cpu_usage":45.2} +{"index":{"_id":"2"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-02","cpu_usage":38.7} +{"index":{"_id":"3"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-03","cpu_usage":55.3} +{"index":{"_id":"4"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-04","cpu_usage":42.1} +{"index":{"_id":"5"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-05","cpu_usage":41.8} +{"index":{"_id":"6"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-06","cpu_usage":39.4} +{"index":{"_id":"7"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-07","cpu_usage":48.6} +{"index":{"_id":"8"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-08","cpu_usage":44.2} +{"index":{"_id":"9"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-09","cpu_usage":67.8} +{"index":{"_id":"10"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-10","cpu_usage":35.9} +{"index":{"_id":"11"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-11","cpu_usage":43.1} diff --git a/doctest/test_data/events_null.json b/doctest/test_data/events_null.json new file mode 100644 index 00000000000..d5e030621db --- /dev/null +++ b/doctest/test_data/events_null.json @@ -0,0 +1,12 @@ +{"index":{"_id":"1"}} +{"@timestamp":"2024-07-01T00:00:00","host":"web-01","cpu_usage":45.2,"region":"us-east"} +{"index":{"_id":"2"}} +{"@timestamp":"2024-07-01T00:01:00","host":"web-02","cpu_usage":38.7,"region":"us-west"} +{"index":{"_id":"3"}} +{"@timestamp":"2024-07-01T00:02:00","host":"web-01","cpu_usage":55.3,"region":"us-east"} +{"index":{"_id":"4"}} +{"@timestamp":"2024-07-01T00:03:00","host":"db-01","cpu_usage":42.1,"region":"eu-west"} +{"index":{"_id":"5"}} +{"@timestamp":"2024-07-01T00:04:00","host":"web-02","cpu_usage":41.8,"region":"us-west"} +{"index":{"_id":"6"}} +{"@timestamp":"2024-07-01T00:05:00","cpu_usage":50.0,"region":"us-east"} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index c2d1112b584..4fd9c230ff6 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -41,6 +41,8 @@ 'worker': 'worker.json', 'work_information': 'work_information.json', 'events': 'events.json', + 'events_null': 'events_null.json', + 'events_many_hosts': 'events_many_hosts.json', 'otellogs': 'otellogs.json', 'time_data': 'time_test_data.json', 'time_data2': 'time_test_data2.json', diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java index b325912e7d7..e687751ef0c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java @@ -169,8 +169,8 @@ public void testChartLimit0WithUseOther() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | chart limit=0 useother=true otherstr='max_among_other'" - + " max(severityNumber) over flags by severityText", + "source=%s | chart max(severityNumber) over flags by severityText limit=0" + + " useother=true otherstr='max_among_other'", TEST_INDEX_OTEL_LOGS)); verifySchema( result, @@ -250,7 +250,8 @@ public void testChartLimitTopWithMinAgg() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | chart limit=top2 min(severityNumber) over flags by severityText", + "source=%s | chart limit=top2 min(severityNumber) over flags by severityText" + + " useother=true otherstr='OTHER'", TEST_INDEX_OTEL_LOGS)); verifySchema( result, @@ -306,8 +307,8 @@ public void testChartUseNullFalseWithNullStr() throws IOException { JSONObject result = executeQuery( String.format( - "source=%s | chart usenull=false nullstr='not_shown' count() over gender by age" - + " span=10", + "source=%s | chart usenull=false count() over gender by age " + + " span=10 nullstr='not_shown'", TEST_INDEX_BANK_WITH_NULL_VALUES)); verifySchema( result, schema("gender", "string"), schema("age", "string"), schema("count()", "bigint")); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java index 4d9352e9e87..3b5c5f55475 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java @@ -5,9 +5,8 @@ package org.opensearch.sql.calcite.remote; -import static org.junit.jupiter.api.Assertions.*; import static org.opensearch.sql.legacy.TestUtils.*; -import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.util.MatcherUtils.*; import java.io.IOException; @@ -25,9 +24,10 @@ public void init() throws Exception { disallowCalciteFallback(); // Create events index with timestamp data - createEventsIndex(); + loadIndex(Index.BANK); + loadIndex(Index.EVENTS); + loadIndex(Index.EVENTS_NULL); createEventsManyHostsIndex(); - createEventsNullIndex(); } @Test @@ -37,13 +37,12 @@ public void testTimechartWithHourSpanAndGroupBy() throws IOException { result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); + schema("count()", "bigint")); verifyDataRows( result, rows("2024-07-01 00:00:00", "db-01", 1), rows("2024-07-01 00:00:00", "web-01", 2), rows("2024-07-01 00:00:00", "web-02", 2)); - assertEquals(3, result.getInt("total")); } @Test @@ -53,51 +52,26 @@ public void testTimechartWithMinuteSpanAndGroupBy() throws IOException { result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); - - // For count aggregation with default limit (no OTHER needed): 3 hosts × 5 time spans = 15 rows - assertEquals(15, result.getInt("total")); + schema("count()", "bigint")); verifyDataRows( result, - rows("2024-07-01 00:00:00", "db-01", 0), rows("2024-07-01 00:00:00", "web-01", 1), - rows("2024-07-01 00:00:00", "web-02", 0), - rows("2024-07-01 00:01:00", "db-01", 0), - rows("2024-07-01 00:01:00", "web-01", 0), rows("2024-07-01 00:01:00", "web-02", 1), - rows("2024-07-01 00:02:00", "db-01", 0), rows("2024-07-01 00:02:00", "web-01", 1), - rows("2024-07-01 00:02:00", "web-02", 0), rows("2024-07-01 00:03:00", "db-01", 1), - rows("2024-07-01 00:03:00", "web-01", 0), - rows("2024-07-01 00:03:00", "web-02", 0), - rows("2024-07-01 00:04:00", "db-01", 0), - rows("2024-07-01 00:04:00", "web-01", 0), rows("2024-07-01 00:04:00", "web-02", 1)); } @Test public void testTimechartWithoutTimestampField() throws IOException { - // Create index without @timestamp field - String noTimestampMapping = - "{\"mappings\":{\"properties\":{\"name\":{\"type\":\"keyword\"},\"occupation\":{\"type\":\"keyword\"},\"country\":{\"type\":\"keyword\"},\"salary\":{\"type\":\"integer\"},\"year\":{\"type\":\"integer\"},\"month\":{\"type\":\"integer\"}}}}"; - if (!isIndexExist(client(), "no_timestamp")) { - createIndexByRestClient(client(), "no_timestamp", noTimestampMapping); - loadDataByRestClient(client(), "no_timestamp", "src/test/resources/occupation.json"); - } - - // Test should throw exception for missing @timestamp field Throwable exception = - assertThrowsWithReplace( + assertThrows( ResponseException.class, () -> { - executeQuery("source=no_timestamp | timechart count()"); + executeQuery(String.format("source=%s | timechart count()", TEST_INDEX_BANK)); }); - assertTrue( - "Error message should mention missing @timestamp field", - exception.getMessage().contains("@timestamp") - || exception.getMessage().contains("timestamp")); + verifyErrorMessageContains(exception, "Field [@timestamp] not found."); } @Test @@ -111,7 +85,6 @@ public void testTimechartWithMinuteSpanNoGroupBy() throws IOException { rows("2024-07-01 00:02:00", 55.3), rows("2024-07-01 00:03:00", 42.1), rows("2024-07-01 00:04:00", 41.8)); - assertEquals(5, result.getInt("total")); } @Test @@ -121,28 +94,15 @@ public void testTimechartWithSpanCountGroupBy() throws IOException { result, schema("@timestamp", "timestamp"), schema("region", "string"), - schema("count", "bigint")); - // For count aggregation with 3 regions (< default limit 10), should show zero-filled results: 3 - // regions × 5 time spans = 15 rows - assertEquals(15, result.getInt("total")); + schema("count()", "bigint")); verifyDataRows( result, rows("2024-07-01 00:00:00", "us-east", 1), - rows("2024-07-01 00:00:00", "us-west", 0), - rows("2024-07-01 00:00:00", "eu-west", 0), - rows("2024-07-01 00:01:00", "us-east", 0), rows("2024-07-01 00:01:00", "us-west", 1), - rows("2024-07-01 00:01:00", "eu-west", 0), rows("2024-07-01 00:02:00", "us-east", 1), - rows("2024-07-01 00:02:00", "us-west", 0), - rows("2024-07-01 00:02:00", "eu-west", 0), - rows("2024-07-01 00:03:00", "us-east", 0), - rows("2024-07-01 00:03:00", "us-west", 0), rows("2024-07-01 00:03:00", "eu-west", 1), - rows("2024-07-01 00:04:00", "us-east", 0), - rows("2024-07-01 00:04:00", "us-west", 1), - rows("2024-07-01 00:04:00", "eu-west", 0)); + rows("2024-07-01 00:04:00", "us-west", 1)); } @Test @@ -157,19 +117,8 @@ public void testTimechartWithOtherCategory() throws IOException { schema("avg(cpu_usage)", "double")); // Verify we have 11 data rows (10 hosts + OTHER) - assertEquals(11, result.getJSONArray("datarows").length()); - - // Verify the OTHER row exists with the correct value - boolean foundOther = false; - for (int i = 0; i < result.getJSONArray("datarows").length(); i++) { - Object[] row = result.getJSONArray("datarows").getJSONArray(i).toList().toArray(); - if ("OTHER".equals(row[1])) { - foundOther = true; - assertEquals(35.9, ((Number) row[2]).doubleValue(), 0.01); - break; - } - } - assertTrue("OTHER category not found in results", foundOther); + verifyNumOfRows(result, 11); + verifyDataRowsSome(result, rows("2024-07-01 00:00:00", "OTHER", 35.9)); } @Test @@ -183,13 +132,13 @@ public void testTimechartWithLimit() throws IOException { schema("host", "string"), schema("avg(cpu_usage)", "double")); - verifyDataRows( + verifyDataRowsInOrder( result, rows("2024-07-01 00:00:00", "web-01", 45.2), - rows("2024-07-01 00:01:00", "OTHER", 38.7), + rows("2024-07-01 00:01:00", "web-02", 38.7), rows("2024-07-01 00:02:00", "web-01", 55.3), - rows("2024-07-01 00:03:00", "db-01", 42.1), - rows("2024-07-01 00:04:00", "OTHER", 41.8)); + rows("2024-07-01 00:03:00", "OTHER", 42.1), + rows("2024-07-01 00:04:00", "web-02", 41.8)); } @Test @@ -200,29 +149,15 @@ public void testTimechartWithLimitCountGroupBy() throws IOException { result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); - - // For count with limit=2, should show zero-filled results: 3 hosts (web-01, web-02, OTHER) × 5 - // time spans = 15 rows - assertEquals(15, result.getInt("total")); + schema("count()", "bigint")); verifyDataRows( result, rows("2024-07-01 00:00:00", "web-01", 1), - rows("2024-07-01 00:00:00", "web-02", 0), - rows("2024-07-01 00:00:00", "OTHER", 0), - rows("2024-07-01 00:01:00", "web-01", 0), rows("2024-07-01 00:01:00", "web-02", 1), - rows("2024-07-01 00:01:00", "OTHER", 0), rows("2024-07-01 00:02:00", "web-01", 1), - rows("2024-07-01 00:02:00", "web-02", 0), - rows("2024-07-01 00:02:00", "OTHER", 0), - rows("2024-07-01 00:03:00", "web-01", 0), - rows("2024-07-01 00:03:00", "web-02", 0), rows("2024-07-01 00:03:00", "OTHER", 1), - rows("2024-07-01 00:04:00", "web-01", 0), - rows("2024-07-01 00:04:00", "web-02", 1), - rows("2024-07-01 00:04:00", "OTHER", 0)); + rows("2024-07-01 00:04:00", "web-02", 1)); } @Test @@ -239,7 +174,7 @@ public void testTimechartWithLimitZeroAndAvg() throws IOException { schema("avg(cpu_usage)", "double")); // Verify we have 11 data rows (all 11 hosts, no OTHER) - assertEquals(11, result.getJSONArray("datarows").length()); + verifyNumOfRows(result, 11); // Verify no OTHER category boolean foundOther = false; @@ -263,10 +198,9 @@ public void testTimechartWithLimitZeroAndCount() throws IOException { result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); + schema("count()", "bigint")); - // For count with limit=0, should show zero-filled results: 11 hosts × 1 time span = 11 rows - assertEquals(11, result.getInt("total")); + verifyNumOfRows(result, 11); } @Test @@ -282,7 +216,7 @@ public void testTimechartWithUseOtherFalseAndAvg() throws IOException { schema("avg(cpu_usage)", "double")); // Verify we have 10 data rows (top 10 hosts, no OTHER) - assertEquals(10, result.getJSONArray("datarows").length()); + verifyNumOfRows(result, 10); // Verify no OTHER category boolean foundOther = false; @@ -305,11 +239,9 @@ public void testTimechartWithUseOtherFalseAndCount() throws IOException { result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); + schema("count()", "bigint")); - // For count with useother=false, should show zero-filled results: 10 hosts × 1 time span = 10 - // rows - assertEquals(10, result.getInt("total")); + verifyNumOfRows(result, 10); } @Test @@ -320,28 +252,15 @@ public void testTimechartWithCountNoLimitByHostShowZero() throws IOException { result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); - - // For count aggregation, should show zero-filled results: 3 hosts × 5 time spans = 15 rows - assertEquals(15, result.getInt("total")); + schema("count()", "bigint")); verifyDataRows( result, rows("2024-07-01 00:00:00", "web-01", 1), - rows("2024-07-01 00:00:00", "web-02", 0), - rows("2024-07-01 00:00:00", "db-01", 0), - rows("2024-07-01 00:01:00", "web-01", 0), rows("2024-07-01 00:01:00", "web-02", 1), - rows("2024-07-01 00:01:00", "db-01", 0), rows("2024-07-01 00:02:00", "web-01", 1), - rows("2024-07-01 00:02:00", "web-02", 0), - rows("2024-07-01 00:02:00", "db-01", 0), - rows("2024-07-01 00:03:00", "web-01", 0), - rows("2024-07-01 00:03:00", "web-02", 0), rows("2024-07-01 00:03:00", "db-01", 1), - rows("2024-07-01 00:04:00", "web-01", 0), - rows("2024-07-01 00:04:00", "web-02", 1), - rows("2024-07-01 00:04:00", "db-01", 0)); + rows("2024-07-01 00:04:00", "web-02", 1)); } @Test @@ -357,106 +276,68 @@ public void testTimechartWithLimitAndUseOther() throws IOException { schema("host", "string"), schema("avg(cpu_usage)", "double")); - // Verify we have 4 data rows (3 hosts + OTHER) - assertEquals(4, result.getJSONArray("datarows").length()); - - // Verify specific values with tolerance for floating point precision - boolean foundOther = false, foundWeb03 = false, foundWeb07 = false, foundWeb09 = false; - for (int i = 0; i < result.getJSONArray("datarows").length(); i++) { - Object[] row = result.getJSONArray("datarows").getJSONArray(i).toList().toArray(); - String host = (String) row[1]; - double cpuUsage = ((Number) row[2]).doubleValue(); - - if ("OTHER".equals(host)) { - foundOther = true; - assertEquals(41.3, cpuUsage, 0.1); - } else if ("web-03".equals(host)) { - foundWeb03 = true; - assertEquals(55.3, cpuUsage, 0.1); - } else if ("web-07".equals(host)) { - foundWeb07 = true; - assertEquals(48.6, cpuUsage, 0.1); - } else if ("web-09".equals(host)) { - foundWeb09 = true; - assertEquals(67.8, cpuUsage, 0.1); - } - } - assertTrue("OTHER not found", foundOther); - assertTrue("web-03 not found", foundWeb03); - assertTrue("web-07 not found", foundWeb07); - assertTrue("web-09 not found", foundWeb09); + verifyDataRows( + result, + closeTo("2024-07-01 00:00:00", "OTHER", 41.300000000000004), + closeTo("2024-07-01 00:00:00", "web-03", 55.3), + closeTo("2024-07-01 00:00:00", "web-07", 48.6), + closeTo("2024-07-01 00:00:00", "web-09", 67.8)); } @Test public void testTimechartWithMissingHostValues() throws IOException { - createEventsNullIndex(); - JSONObject result = executeQuery("source=events_null | timechart span=1d count() by host"); verifySchema( result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); + schema("count()", "bigint")); verifyDataRows( result, rows("2024-07-01 00:00:00", "db-01", 1), rows("2024-07-01 00:00:00", "web-01", 2), rows("2024-07-01 00:00:00", "web-02", 2), - rows("2024-07-01 00:00:00", null, 1)); - - assertEquals(4, result.getInt("total")); + rows("2024-07-01 00:00:00", "NULL", 1)); } @Test public void testTimechartWithNullAndOther() throws IOException { - createEventsNullIndex(); - JSONObject result = - executeQuery("source=events_null | timechart span=1d limit=2 count() by host"); + executeQuery("source=events_null | timechart count() by host span=1d limit=2"); verifySchema( result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); + schema("count()", "bigint")); verifyDataRows( result, rows("2024-07-01 00:00:00", "OTHER", 1), rows("2024-07-01 00:00:00", "web-01", 2), rows("2024-07-01 00:00:00", "web-02", 2), - rows("2024-07-01 00:00:00", null, 1)); - - assertEquals(4, result.getInt("total")); + rows("2024-07-01 00:00:00", "NULL", 1)); } @Test public void testTimechartWithNullAndLimit() throws IOException { - createEventsNullIndex(); - JSONObject result = - executeQuery("source=events_null | timechart span=1d limit=3 count() by host"); + executeQuery("source=events_null | timechart span=1d count() by host limit=3"); verifySchema( result, schema("@timestamp", "timestamp"), schema("host", "string"), - schema("count", "bigint")); + schema("count()", "bigint")); verifyDataRows( result, rows("2024-07-01 00:00:00", "db-01", 1), rows("2024-07-01 00:00:00", "web-01", 2), rows("2024-07-01 00:00:00", "web-02", 2), - rows("2024-07-01 00:00:00", null, 1)); - - assertEquals(4, result.getInt("total")); - } - - private void createEventsIndex() throws IOException { - loadIndex(Index.EVENTS); + rows("2024-07-01 00:00:00", "NULL", 1)); } private void createEventsManyHostsIndex() throws IOException { @@ -468,13 +349,4 @@ private void createEventsManyHostsIndex() throws IOException { client(), "events_many_hosts", "src/test/resources/events_many_hosts.json"); } } - - private void createEventsNullIndex() throws IOException { - String eventsMapping = - "{\"mappings\":{\"properties\":{\"@timestamp\":{\"type\":\"date\"},\"host\":{\"type\":\"text\"},\"cpu_usage\":{\"type\":\"double\"},\"region\":{\"type\":\"keyword\"}}}}"; - if (!isIndexExist(client(), "events_null")) { - createIndexByRestClient(client(), "events_null", eventsMapping); - loadDataByRestClient(client(), "events_null", "src/test/resources/events_null.json"); - } - } } diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index 4e7d72ae530..fb5c0be026b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -184,6 +184,12 @@ public static void verifyDataRowsInOrder(JSONObject response, Matcher verifyInOrder(response.getJSONArray("datarows"), matchers); } + @SafeVarargs + @SuppressWarnings("unchecked") + public static void verifyDataRowsSome(JSONObject response, Matcher... matchers) { + verifySome(response.getJSONArray("datarows"), matchers); + } + public static void verifyNumOfRows(JSONObject response, int numOfRow) { assertEquals(numOfRow, response.getJSONArray("datarows").length()); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml index a315860aac9..bd9187b4908 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart.yaml @@ -3,37 +3,38 @@ calcite: LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[AVG($2)]) - LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) + LogicalProject(@timestamp=[$0], host=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(cpu_usage)=[$2]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) - LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) - LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) - LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[AVG($2)]) - LogicalFilter(condition=[IS NOT NULL($1)]) - LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) - LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) - LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + LogicalProject(@timestamp=[$1], host=[$0], avg(cpu_usage)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(cpu_usage)=[AVG($1)]) + LogicalProject(host=[$4], cpu_usage=[$7], @timestamp0=[SPAN($1, 1, 'm')]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + LogicalProject(host=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(host=[$0], avg(cpu_usage)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(cpu_usage)=[AVG($1)]) + LogicalProject(host=[$4], cpu_usage=[$7], @timestamp0=[SPAN($1, 1, 'm')]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) EnumerableSort(sort0=[$1], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], avg(cpu_usage)=[$t8]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t0, $t3, $t4)], host=[$t1], cpu_usage=[$t2], @timestamp0=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host, cpu_usage], FILTER->AND(IS NOT NULL($0), IS NOT NULL($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"cpu_usage","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp","host","cpu_usage"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], host=[$t0], grand_total=[$t7]) - EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], $f2=[$t8]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t2, $t3, $t4)], proj#0..1=[{exprs}], $f2=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, cpu_usage, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","cpu_usage","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..2=[{inputs}], host=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($1)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], host=[$t0], avg(cpu_usage)=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=['m'], expr#5=[SPAN($t0, $t3, $t4)], host=[$t1], cpu_usage=[$t2], @timestamp0=[$t5]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host, cpu_usage], FILTER->AND(IS NOT NULL($0), IS NOT NULL($2)), FILTER->IS NOT NULL($1)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"filter":[{"bool":{"must":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"cpu_usage","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"host","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp","host","cpu_usage"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_count.yaml index adba9c12202..17d8b0f917b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_timechart_count.yaml @@ -2,70 +2,35 @@ calcite: logical: | LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - LogicalAggregate(group=[{0, 1}], count=[SUM($2)]) - LogicalUnion(all=[false]) - LogicalAggregate(group=[{0, 1}], actual_count=[SUM($2)]) - LogicalProject(@timestamp=[CAST($0):TIMESTAMP(0) NOT NULL], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], count=[$2]) - LogicalJoin(condition=[IS NOT DISTINCT FROM($1, $3)], joinType=[left]) - LogicalProject(@timestamp=[$1], host=[$0], $f2_0=[$2]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) - LogicalFilter(condition=[IS NOT NULL($1)]) - LogicalProject(@timestamp=[$1], host=[$0], $f2_0=[$2]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalProject(@timestamp=[CAST($0):TIMESTAMP(0) NOT NULL], host=[$1], count=[0]) - LogicalJoin(condition=[true], joinType=[inner]) - LogicalAggregate(group=[{0}]) - LogicalProject(@timestamp=[$1]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalAggregate(group=[{0}]) - LogicalProject($f0=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))]) - LogicalJoin(condition=[IS NOT DISTINCT FROM($1, $3)], joinType=[left]) - LogicalProject(@timestamp=[$1], host=[$0], $f2_0=[$2]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) + LogicalAggregate(group=[{0, 1}], count()=[SUM($2)]) + LogicalProject(@timestamp=[$0], host=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], count()=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(@timestamp=[$1], host=[$0], count()=[$2]) + LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) + LogicalProject(host=[$4], @timestamp0=[SPAN($1, 1, 'm')]) + LogicalFilter(condition=[IS NOT NULL($1)]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + LogicalProject(host=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(host=[$0], count()=[$2]) + LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) + LogicalProject(host=[$4], @timestamp0=[SPAN($1, 1, 'm')]) LogicalFilter(condition=[IS NOT NULL($1)]) - LogicalProject(@timestamp=[$1], host=[$0], $f2_0=[$2]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - EnumerableAggregate(group=[{0, 1}], count=[$SUM0($2)]) - EnumerableUnion(all=[false]) - EnumerableAggregate(group=[{0, 1}], actual_count=[$SUM0($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[CAST($t0):TIMESTAMP(0) NOT NULL], expr#6=[IS NOT NULL($t3)], expr#7=[IS NULL($t1)], expr#8=[null:NULL], expr#9=['OTHER'], expr#10=[CASE($t7, $t8, $t9)], expr#11=[CASE($t6, $t1, $t10)], @timestamp=[$t5], host=[$t11], count=[$t2]) - EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($1, $3)], joinType=[left]) - EnumerableCalc(expr#0..2=[{inputs}], @timestamp=[$t1], host=[$t0], $f2_0=[$t2]) - EnumerableAggregate(group=[{0, 1}], agg#0=[COUNT()]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=['m'], expr#4=[SPAN($t1, $t2, $t3)], host=[$t0], $f1=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{0}], grand_total=[COUNT()]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[CAST($t0):TIMESTAMP(0) NOT NULL], expr#3=[0], @timestamp=[$t2], host=[$t1], count=[$t3]) - EnumerableNestedLoopJoin(condition=[true], joinType=[inner]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1})], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"$f2":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableAggregate(group=[{0}]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t1)], expr#4=[IS NULL($t0)], expr#5=[null:NULL], expr#6=['OTHER'], expr#7=[CASE($t4, $t5, $t6)], expr#8=[CASE($t3, $t0, $t7)], $f0=[$t8]) - EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[left]) - EnumerableCalc(expr#0..1=[{inputs}], host=[$t0]) - EnumerableAggregate(group=[{0, 1}]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=['m'], expr#4=[SPAN($t1, $t2, $t3)], host=[$t0], $f1=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["host","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{0}], grand_total=[COUNT()]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[host, @timestamp], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"host","boost":1.0}},"_source":{"includes":["host","@timestamp"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableAggregate(group=[{0, 1}], count()=[$SUM0($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], @timestamp=[$t0], host=[$t10], count()=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], @timestamp=[$t1], host=[$t0], count()=[$t2]) + EnumerableAggregate(group=[{0, 1}], count()=[COUNT()]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=['m'], expr#4=[SPAN($t0, $t2, $t3)], host=[$t1], @timestamp0=[$t4]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host], FILTER->IS NOT NULL($0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"_source":{"includes":["@timestamp","host"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], host=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{1}], __grand_total__=[COUNT()]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[PROJECT->[@timestamp, host], FILTER->IS NOT NULL($0), FILTER->IS NOT NULL($1)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"bool":{"filter":[{"exists":{"field":"@timestamp","boost":1.0}},{"exists":{"field":"host","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp","host"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml index 5aa55ca656b..e982ce038e2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart.yaml @@ -3,37 +3,38 @@ calcite: LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) LogicalAggregate(group=[{0, 1}], avg(cpu_usage)=[AVG($2)]) - LogicalProject(@timestamp=[$0], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], avg(cpu_usage)=[$2]) + LogicalProject(@timestamp=[$0], host=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(cpu_usage)=[$2]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) - LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) - LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) - LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[AVG($2)]) - LogicalFilter(condition=[IS NOT NULL($1)]) - LogicalProject(@timestamp=[$1], host=[$0], $f2=[$2]) - LogicalAggregate(group=[{0, 2}], agg#0=[AVG($1)]) - LogicalProject(host=[$4], cpu_usage=[$7], $f3=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + LogicalProject(@timestamp=[$1], host=[$0], avg(cpu_usage)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(cpu_usage)=[AVG($1)]) + LogicalProject(host=[$4], cpu_usage=[$7], @timestamp0=[SPAN($1, 1, 'm')]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + LogicalProject(host=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(host=[$0], avg(cpu_usage)=[$2]) + LogicalAggregate(group=[{0, 2}], avg(cpu_usage)=[AVG($1)]) + LogicalProject(host=[$4], cpu_usage=[$7], @timestamp0=[SPAN($1, 1, 'm')]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t3)], expr#6=[IS NULL($t1)], expr#7=[null:NULL], expr#8=['OTHER'], expr#9=[CASE($t6, $t7, $t8)], expr#10=[CASE($t5, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], @timestamp=[$t0], host=[$t10], avg(cpu_usage)=[$t2]) EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) EnumerableSort(sort0=[$1], dir0=[ASC]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], $f2=[$t8]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], @timestamp=[$t1], host=[$t0], avg(cpu_usage)=[$t8]) EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], expr#19=[IS NOT NULL($t1)], expr#20=[IS NOT NULL($t7)], expr#21=[AND($t19, $t20)], host=[$t4], cpu_usage=[$t7], @timestamp0=[$t18], $condition=[$t21]) CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) EnumerableSort(sort0=[$0], dir0=[ASC]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], host=[$t0], grand_total=[$t7]) - EnumerableAggregate(group=[{0}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], expr#9=[IS NOT NULL($t0)], proj#0..1=[{exprs}], $f2=[$t8], $condition=[$t9]) - EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], cpu_usage=[$t7], $f3=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) + EnumerableCalc(expr#0..2=[{inputs}], host=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{0}], __grand_total__=[SUM($2)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(cpu_usage)=[$t8]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], expr#19=[IS NOT NULL($t1)], expr#20=[IS NOT NULL($t7)], expr#21=[IS NOT NULL($t4)], expr#22=[AND($t19, $t20, $t21)], host=[$t4], cpu_usage=[$t7], @timestamp0=[$t18], $condition=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart_count.yaml index e60799af17d..2979778506a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_timechart_count.yaml @@ -2,74 +2,36 @@ calcite: logical: | LogicalSystemLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - LogicalAggregate(group=[{0, 1}], count=[SUM($2)]) - LogicalUnion(all=[false]) - LogicalAggregate(group=[{0, 1}], actual_count=[SUM($2)]) - LogicalProject(@timestamp=[CAST($0):TIMESTAMP(0) NOT NULL], host=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))], count=[$2]) - LogicalJoin(condition=[IS NOT DISTINCT FROM($1, $3)], joinType=[left]) - LogicalProject(@timestamp=[$1], host=[$0], $f2_0=[$2]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) - LogicalFilter(condition=[IS NOT NULL($1)]) - LogicalProject(@timestamp=[$1], host=[$0], $f2_0=[$2]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalProject(@timestamp=[CAST($0):TIMESTAMP(0) NOT NULL], host=[$1], count=[0]) - LogicalJoin(condition=[true], joinType=[inner]) - LogicalAggregate(group=[{0}]) - LogicalProject(@timestamp=[$1]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalAggregate(group=[{0}]) - LogicalProject($f0=[CASE(IS NOT NULL($3), $1, CASE(IS NULL($1), null:NULL, 'OTHER'))]) - LogicalJoin(condition=[IS NOT DISTINCT FROM($1, $3)], joinType=[left]) - LogicalProject(@timestamp=[$1], host=[$0], $f2_0=[$2]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) - LogicalSort(sort0=[$1], dir0=[DESC], fetch=[10]) - LogicalAggregate(group=[{1}], grand_total=[SUM($2)]) + LogicalAggregate(group=[{0, 1}], count()=[SUM($2)]) + LogicalProject(@timestamp=[$0], host=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], count()=[$2]) + LogicalJoin(condition=[=($1, $3)], joinType=[left]) + LogicalProject(@timestamp=[$1], host=[$0], count()=[$2]) + LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) + LogicalProject(host=[$4], @timestamp0=[SPAN($1, 1, 'm')]) + LogicalFilter(condition=[IS NOT NULL($1)]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + LogicalProject(host=[$0], __grand_total__=[$1], _row_number_chart_=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)]) + LogicalAggregate(group=[{0}], __grand_total__=[SUM($1)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(host=[$0], count()=[$2]) + LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) + LogicalProject(host=[$4], @timestamp0=[SPAN($1, 1, 'm')]) LogicalFilter(condition=[IS NOT NULL($1)]) - LogicalProject(@timestamp=[$1], host=[$0], $f2_0=[$2]) - LogicalAggregate(group=[{0, 1}], agg#0=[COUNT()]) - LogicalProject(host=[$4], $f2=[SPAN($1, 1, 'm')]) - CalciteLogicalIndexScan(table=[[OpenSearch, events]]) + CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC]) - EnumerableAggregate(group=[{0, 1}], count=[$SUM0($2)]) - EnumerableUnion(all=[false]) - EnumerableAggregate(group=[{0, 1}], actual_count=[$SUM0($2)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[CAST($t0):TIMESTAMP(0) NOT NULL], expr#6=[IS NOT NULL($t3)], expr#7=[IS NULL($t1)], expr#8=[null:NULL], expr#9=['OTHER'], expr#10=[CASE($t7, $t8, $t9)], expr#11=[CASE($t6, $t1, $t10)], @timestamp=[$t5], host=[$t11], count=[$t2]) - EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($1, $3)], joinType=[left]) - EnumerableCalc(expr#0..2=[{inputs}], @timestamp=[$t1], host=[$t0], $f2_0=[$t2]) - EnumerableAggregate(group=[{0, 1}], agg#0=[COUNT()]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], $f2=[$t18]) + EnumerableAggregate(group=[{0, 1}], count()=[$SUM0($2)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], @timestamp=[$t0], host=[$t10], count()=[$t2]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left]) + EnumerableSort(sort0=[$1], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], @timestamp=[$t1], host=[$t0], count()=[$t2]) + EnumerableAggregate(group=[{0, 1}], count()=[COUNT()]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], expr#19=[IS NOT NULL($t1)], host=[$t4], @timestamp0=[$t18], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..2=[{inputs}], host=[$t0], $1=[$t2]) + EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableAggregate(group=[{4}], __grand_total__=[COUNT()]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[IS NOT NULL($t1)], expr#17=[IS NOT NULL($t4)], expr#18=[AND($t16, $t17)], proj#0..15=[{exprs}], $condition=[$t18]) CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{4}], grand_total=[COUNT()]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[IS NOT NULL($t4)], proj#0..15=[{exprs}], $condition=[$t16]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[CAST($t0):TIMESTAMP(0) NOT NULL], expr#3=[0], @timestamp=[$t2], host=[$t1], count=[$t3]) - EnumerableNestedLoopJoin(condition=[true], joinType=[inner]) - EnumerableAggregate(group=[{1}]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], $f2=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) - EnumerableAggregate(group=[{0}]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t1)], expr#4=[IS NULL($t0)], expr#5=[null:NULL], expr#6=['OTHER'], expr#7=[CASE($t4, $t5, $t6)], expr#8=[CASE($t3, $t0, $t7)], $f0=[$t8]) - EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[left]) - EnumerableCalc(expr#0..1=[{inputs}], host=[$t0]) - EnumerableAggregate(group=[{0, 1}]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[1], expr#17=['m'], expr#18=[SPAN($t1, $t16, $t17)], host=[$t4], $f2=[$t18]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) - EnumerableLimit(fetch=[10]) - EnumerableSort(sort0=[$1], dir0=[DESC]) - EnumerableAggregate(group=[{4}], grand_total=[COUNT()]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[IS NOT NULL($t4)], proj#0..15=[{exprs}], $condition=[$t16]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]]) \ No newline at end of file diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml index 147ab899552..06ce51afbc8 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4550.yml @@ -41,7 +41,7 @@ setup: query: source=test_data_2023 | timechart span=500ms count() - match: { total: 5 } - - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "count", "type": "bigint" }] } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "count()", "type": "bigint" }] } - match: {"datarows": [["2023-10-08 10:00:00", 1], ["2023-10-08 10:00:00.5", 1], ["2023-10-08 10:00:01", 1], ["2023-10-08 10:00:01.5", 1], ["2023-10-08 10:00:02", 1]]} --- @@ -79,7 +79,7 @@ setup: query: source=test_data_2023 | timechart span=250milliseconds count() - match: { total: 5 } - - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "count", "type": "bigint" }] } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "count()", "type": "bigint" }] } --- "timechart with second span for comparison": @@ -97,5 +97,5 @@ setup: query: source=test_data_2023 | timechart span=1s count() - match: { total: 3 } - - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "count", "type": "bigint" }] } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "count()", "type": "bigint" }] } - match: {"datarows": [["2023-10-08 10:00:00", 2], ["2023-10-08 10:00:01", 2], ["2023-10-08 10:00:02", 1]]} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4581.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4581.yml new file mode 100644 index 00000000000..3ef89e9f90c --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4581.yml @@ -0,0 +1,53 @@ +setup: + - do: + indices.create: + index: opensearch-sql_test_index_otel_logs + body: + mappings: + properties: + "@timestamp": + type: date + "severityNumber": + type: integer + "flags": + type: long + - do: + bulk: + index: opensearch-sql_test_index_otel_logs + refresh: true + body: + - '{"index":{}}' + - '{"@timestamp":"2023-10-08T10:00:00.000Z","severityNumber":10,"flags":0}' + - '{"index":{}}' + - '{"@timestamp":"2023-10-08T10:00:01.000Z","severityNumber":12,"flags":0}' + - '{"index":{}}' + - '{"@timestamp":"2023-10-08T10:00:02.000Z","severityNumber":15,"flags":0}' + - '{"index":{}}' + - '{"@timestamp":"2023-10-08T10:00:03.000Z","severityNumber":13,"flags":0}' + - '{"index":{}}' + - '{"@timestamp":"2023-10-08T10:00:04.000Z","severityNumber":11,"flags":0}' + - '{"index":{}}' + - '{"@timestamp":"2023-10-08T10:00:05.000Z","severityNumber":14,"flags":0}' + - '{"index":{}}' + - '{"@timestamp":"2023-10-08T10:00:06.000Z","severityNumber":16,"flags":0}' + - '{"index":{}}' + - '{"@timestamp":"2023-10-08T10:00:07.000Z","severityNumber":9,"flags":1}' + +--- +"timechart with grouping by non-string field": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=opensearch-sql_test_index_otel_logs | timechart avg(severityNumber) by flags + + - match: { total: 2 } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "flags", "type": "string" }, { "name": "avg(severityNumber)", "type": "double" }] } + - match: {"datarows": [["2023-10-08 10:00:00", "0", 13.0], ["2023-10-08 10:00:00", "1", 9.0]]} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml new file mode 100644 index 00000000000..952c4cd6387 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4582.yml @@ -0,0 +1,52 @@ +setup: + - do: + indices.create: + index: opensearch-sql_test_index_otel_logs + body: + mappings: + properties: + "@timestamp": + type: date + "severityNumber": + type: long + "severityText": + type: keyword + - do: + bulk: + index: opensearch-sql_test_index_otel_logs + refresh: true + body: + - '{"index":{}}' + - '{"@timestamp":"2024-01-15T10:00:00.000Z","severityNumber":24,"severityText":"FATAL4"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-15T10:00:01.000Z","severityNumber":24,"severityText":"FATAL4"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-15T10:00:02.000Z","severityNumber":23,"severityText":"ERROR5"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-15T10:00:03.000Z","severityNumber":22,"severityText":"ERROR6"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-15T10:00:04.000Z","severityNumber":21,"severityText":"WARN7"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-15T10:00:05.000Z","severityNumber":20,"severityText":"INFO8"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-15T10:00:06.000Z","severityNumber":19,"severityText":"DEBUG9"}' + - '{"index":{}}' + - '{"@timestamp":"2024-01-15T10:00:07.000Z","severityNumber":18,"severityText":"TRACE10"}' + +--- +"timechart with limit should use max aggregation for OTHER category": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=opensearch-sql_test_index_otel_logs | timechart limit=1 span=1d max(severityNumber) by severityText + + - match: { total: 2 } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "severityText", "type": "string" }, { "name": "max(severityNumber)", "type": "bigint" }] } + # The FATAL4 category has max(severityNumber) = 24 + # The OTHER category should have max(severityNumber) = 23 (not sum of all others which would be 143) + - match: {"datarows": [["2024-01-15 00:00:00", "FATAL4", 24], ["2024-01-15 00:00:00", "OTHER", 23]]} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4632.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4632.yml new file mode 100644 index 00000000000..9fad86ff2e7 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4632.yml @@ -0,0 +1,69 @@ +setup: + - do: + indices.create: + index: events + body: + mappings: + properties: + "@timestamp": + type: date + "host": + type: text + "packets": + type: long + - do: + bulk: + index: events + refresh: true + body: + - '{"index":{}}' + - '{"@timestamp":"2023-01-01T10:00:00Z","host":"server1","packets":60}' + - '{"index":{}}' + - '{"@timestamp":"2023-01-01T10:05:00Z","host":"server2","packets":30}' + - '{"index":{}}' + - '{"@timestamp":"2023-01-01T10:10:00Z","host":"server1","packets":60}' + - '{"index":{}}' + - '{"@timestamp":"2023-01-01T10:15:00Z","host":"server2","packets":30}' + - '{"index":{}}' + - '{"@timestamp":"2023-01-01T10:20:00Z","host":"server1","packets":60}' + - '{"index":{}}' + - '{"@timestamp":"2023-01-01T10:25:00Z","host":"server2","packets":30}' + - '{"index":{}}' + - '{"@timestamp":"2023-01-01T10:30:00Z","host":"server1","packets":180}' + - '{"index":{}}' + - '{"@timestamp":"2023-01-01T10:35:00Z","host":"server2","packets":90}' + +--- +"timechart count() by field should not return empty buckets": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=events | timechart span=1m count() by host + + - match: { total: 8 } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "host", "type": "string" }, { "name": "count()", "type": "bigint" }] } + # Verify that only non-empty buckets are returned (8 rows instead of 16) + # Each time bucket should only have one host with actual data + - match: {"datarows": [["2023-01-01 10:00:00", "server1", 1], ["2023-01-01 10:05:00", "server2", 1], ["2023-01-01 10:10:00", "server1", 1], ["2023-01-01 10:15:00", "server2", 1], ["2023-01-01 10:20:00", "server1", 1], ["2023-01-01 10:25:00", "server2", 1], ["2023-01-01 10:30:00", "server1", 1], ["2023-01-01 10:35:00", "server2", 1]]} + +--- +"timechart max() by field should not return empty buckets": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=events | timechart span=1m max(packets) by host + + - match: { total: 8 } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "host", "type": "string" }, { "name": "max(packets)", "type": "bigint" }] } + # Verify that only non-empty buckets are returned + - match: {"datarows": [["2023-01-01 10:00:00", "server1", 60], ["2023-01-01 10:05:00", "server2", 30], ["2023-01-01 10:10:00", "server1", 60], ["2023-01-01 10:15:00", "server2", 30], ["2023-01-01 10:20:00", "server1", 60], ["2023-01-01 10:25:00", "server2", 30], ["2023-01-01 10:30:00", "server1", 180], ["2023-01-01 10:35:00", "server2", 90]]} diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index fd6eb26fe7a..cc5d3d4aa8d 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -299,8 +299,8 @@ reverseCommand ; chartCommand - : CHART chartOptions* statsAggTerm (OVER rowSplit)? (BY columnSplit)? - | CHART chartOptions* statsAggTerm BY rowSplit (COMMA)? columnSplit + : CHART chartOptions* statsAggTerm (OVER rowSplit)? (BY columnSplit)? chartOptions* + | CHART chartOptions* statsAggTerm BY rowSplit (COMMA)? columnSplit chartOptions* ; chartOptions @@ -321,7 +321,7 @@ columnSplit ; timechartCommand - : TIMECHART timechartParameter* statsFunction (BY fieldExpression)? + : TIMECHART timechartParameter* statsAggTerm (BY fieldExpression)? timechartParameter* ; timechartParameter diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 4566dc30a27..ed66682a981 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -111,10 +111,10 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Timechart; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Window; +import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.setting.Settings.Key; @@ -761,7 +761,7 @@ private List parseAggTerms( @Override public UnresolvedPlan visitTimechartCommand(OpenSearchPPLParser.TimechartCommandContext ctx) { UnresolvedExpression binExpression = - AstDSL.span(AstDSL.referImplicitTimestampField(), AstDSL.intLiteral(1), SpanUnit.m); + AstDSL.span(AstDSL.implicitTimestampField(), AstDSL.intLiteral(1), SpanUnit.m); Integer limit = 10; Boolean useOther = true; // Process timechart parameters @@ -778,16 +778,26 @@ public UnresolvedPlan visitTimechartCommand(OpenSearchPPLParser.TimechartCommand } } } + UnresolvedExpression aggregateFunction = parseAggTerms(List.of(ctx.statsAggTerm())).getFirst(); - UnresolvedExpression aggregateFunction = internalVisitExpression(ctx.statsFunction()); UnresolvedExpression byField = ctx.fieldExpression() != null ? internalVisitExpression(ctx.fieldExpression()) : null; - - return new Timechart(null, aggregateFunction) - .span(binExpression) - .by(byField) - .limit(limit) - .useOther(useOther); + List arguments = + List.of( + new Argument("limit", AstDSL.intLiteral(limit)), + new Argument("useother", AstDSL.booleanLiteral(useOther))); + binExpression = AstDSL.alias(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP, binExpression); + if (byField != null) { + byField = + AstDSL.alias( + StringUtils.unquoteIdentifier(getTextInQuery(ctx.fieldExpression())), byField); + } + return Chart.builder() + .aggregationFunction(aggregateFunction) + .rowSplit(binExpression) + .columnSplit(byField) + .arguments(arguments) + .build(); } /** Eval command. */ diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index c18d558ffc7..05029de875a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -69,7 +69,6 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsFunctionCallContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StringLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.TableSourceContext; -import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.TimechartCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.WcFieldExpressionContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParserBaseVisitor; import org.opensearch.sql.ppl.utils.ArgumentFactory; @@ -588,9 +587,18 @@ private List timestampFunctionArguments( @Override public UnresolvedExpression visitPerFunctionCall(PerFunctionCallContext ctx) { - ParseTree parent = ctx.getParent(); String perFuncName = ctx.perFunction().funcName.getText(); - if (!(parent instanceof TimechartCommandContext)) { + // Walk up the parent tree to find timechart command context + ParseTree current = ctx.getParent(); + boolean foundTimechartContext = false; + while (current != null) { + if (current instanceof OpenSearchPPLParser.TimechartCommandContext) { + foundTimechartContext = true; + break; + } + current = current.getParent(); + } + if (!foundTimechartContext) { throw new SyntaxCheckException( perFuncName + " function can only be used within timechart command"); } @@ -672,7 +680,7 @@ public UnresolvedExpression visitSpanClause(SpanClauseContext ctx) { if (ctx.fieldExpression() != null) { fieldExpression = visit(ctx.fieldExpression()); } else { - fieldExpression = AstDSL.referImplicitTimestampField(); + fieldExpression = AstDSL.implicitTimestampField(); } Literal literal = (Literal) visit(ctx.value); return AstDSL.spanFromSpanLengthLiteral(fieldExpression, literal); @@ -978,7 +986,7 @@ public UnresolvedExpression visitTimechartParameter( // Convert span=1h to span(@timestamp, 1h) Literal spanLiteral = (Literal) visit(ctx.spanLiteral()); timechartParameter = - AstDSL.spanFromSpanLengthLiteral(AstDSL.referImplicitTimestampField(), spanLiteral); + AstDSL.spanFromSpanLengthLiteral(AstDSL.implicitTimestampField(), spanLiteral); } else if (ctx.LIMIT() != null) { Literal limit = (Literal) visit(ctx.integerLiteral()); if ((Integer) limit.getValue() < 0) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index b3b91d11b5d..7e8dc16f4dd 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -90,7 +90,6 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; -import org.opensearch.sql.ast.tree.Timechart; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -501,43 +500,14 @@ public String visitReverse(Reverse node, String context) { return StringUtils.format("%s | reverse", child); } - @Override - public String visitTimechart(Timechart node, String context) { - String child = node.getChild().get(0).accept(this, context); - StringBuilder timechartCommand = new StringBuilder(); - timechartCommand.append(" | timechart"); - - // Add span if present - if (node.getBinExpression() != null) { - timechartCommand.append(" span=").append(visitExpression(node.getBinExpression())); - } - - // Add limit if present - if (node.getLimit() != null) { - timechartCommand.append(" limit=").append(node.getLimit()); - } - - // Add useother if present - if (node.getUseOther() != null) { - timechartCommand.append(" useother=").append(node.getUseOther()); - } - - // Add aggregation function - timechartCommand.append(" ").append(visitExpression(node.getAggregateFunction())); - - // Add by clause if present - if (node.getByField() != null) { - timechartCommand.append(" by ").append(visitExpression(node.getByField())); - } - - return StringUtils.format("%s%s", child, timechartCommand.toString()); - } - @Override public String visitChart(Chart node, String context) { String child = node.getChild().get(0).accept(this, context); StringBuilder chartCommand = new StringBuilder(); - chartCommand.append(" | chart"); + + // Check if this is a timechart by looking for timestamp span in rowSplit + boolean isTimechart = isTimechartNode(node); + chartCommand.append(isTimechart ? " | timechart" : " | chart"); for (Argument arg : node.getArguments()) { String argName = arg.getArgName(); @@ -569,6 +539,20 @@ public String visitChart(Chart node, String context) { return StringUtils.format("%s%s", child, chartCommand.toString()); } + private boolean isTimechartNode(Chart node) { + // A Chart node represents a timechart if it has a rowSplit that's an alias containing + // a span on the implicit timestamp field + if (node.getRowSplit() instanceof Alias) { + Alias alias = (Alias) node.getRowSplit(); + if (alias.getDelegated() instanceof Span) { + Span span = (Span) alias.getDelegated(); + return span.getField() instanceof Field + && "@timestamp".equals(((Field) span.getField()).getField().toString()); + } + } + return false; + } + public String visitRex(Rex node, String context) { String child = node.getChild().get(0).accept(this, context); String field = visitExpression(node.getField()); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java index c3ed1ebceea..33a9b84d23b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java @@ -75,8 +75,9 @@ public void testTimechartBasic() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, COUNT(*) `count`\n" + "SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" + + "WHERE `@timestamp` IS NOT NULL\n" + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" + "ORDER BY 1 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -92,8 +93,9 @@ public void testTimechartPerSecond() { + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + " `per_second(cpu_usage)`\n" + "FROM `scott`.`events`\n" + + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" - + "ORDER BY 1 NULLS LAST) `t2`"); + + "ORDER BY 1 NULLS LAST) `t3`"); } @Test @@ -106,8 +108,9 @@ public void testTimechartPerMinute() { + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + " `per_minute(cpu_usage)`\n" + "FROM `scott`.`events`\n" + + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" - + "ORDER BY 1 NULLS LAST) `t2`"); + + "ORDER BY 1 NULLS LAST) `t3`"); } @Test @@ -120,8 +123,9 @@ public void testTimechartPerHour() { + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + " `per_hour(cpu_usage)`\n" + "FROM `scott`.`events`\n" + + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" - + "ORDER BY 1 NULLS LAST) `t2`"); + + "ORDER BY 1 NULLS LAST) `t3`"); } @Test @@ -134,8 +138,9 @@ public void testTimechartPerDay() { + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + " `per_day(cpu_usage)`\n" + "FROM `scott`.`events`\n" + + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" - + "ORDER BY 1 NULLS LAST) `t2`"); + + "ORDER BY 1 NULLS LAST) `t3`"); } @Test @@ -144,8 +149,9 @@ public void testTimechartWithSpan() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, COUNT(*) `count`\n" + "SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" + + "WHERE `@timestamp` IS NOT NULL\n" + "GROUP BY `SPAN`(`@timestamp`, 1, 'h')\n" + "ORDER BY 1 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -157,49 +163,24 @@ public void testTimechartWithLimit() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `@timestamp`, `host`, SUM(`actual_count`) `count`\n" - + "FROM (SELECT CAST(`t1`.`@timestamp` AS TIMESTAMP) `@timestamp`, CASE WHEN" - + " `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE WHEN `t1`.`host` IS NULL THEN" - + " NULL ELSE 'OTHER' END END `host`, SUM(`t1`.`$f2_0`) `actual_count`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `$f2_0`\n" + "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END `host`," + + " SUM(`t2`.`count()`) `count()`\n" + + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t1`\n" - + "LEFT JOIN (SELECT `host`, SUM(`$f2_0`) `grand_total`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `$f2_0`\n" + + "WHERE `@timestamp` IS NOT NULL\n" + + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t2`\n" + + "LEFT JOIN (SELECT `host`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER (ORDER" + + " BY SUM(`count()`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `host`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t4`\n" + + "WHERE `@timestamp` IS NOT NULL\n" + + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t6`\n" + "WHERE `host` IS NOT NULL\n" - + "GROUP BY `host`\n" - + "ORDER BY 2 DESC NULLS FIRST\n" - + "LIMIT 3) `t7` ON `t1`.`host` IS NOT DISTINCT FROM `t7`.`host`\n" - + "GROUP BY CAST(`t1`.`@timestamp` AS TIMESTAMP), CASE WHEN `t7`.`host` IS NOT NULL" - + " THEN `t1`.`host` ELSE CASE WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END" - + " END\n" - + "UNION\n" - + "SELECT CAST(`t13`.`@timestamp` AS TIMESTAMP) `@timestamp`, `t24`.`$f0` `host`, 0" - + " `count`\n" - + "FROM (SELECT `@timestamp`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`\n" - + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t12`\n" - + "GROUP BY `@timestamp`) `t13`\n" - + "CROSS JOIN (SELECT CASE WHEN `t22`.`host` IS NOT NULL THEN `t16`.`host` ELSE CASE" - + " WHEN `t16`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `$f0`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `$f2_0`\n" - + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t16`\n" - + "LEFT JOIN (SELECT `host`, SUM(`$f2_0`) `grand_total`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `$f2_0`\n" - + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t19`\n" - + "WHERE `host` IS NOT NULL\n" - + "GROUP BY `host`\n" - + "ORDER BY 2 DESC NULLS FIRST\n" - + "LIMIT 3) `t22` ON `t16`.`host` IS NOT DISTINCT FROM `t22`.`host`\n" - + "GROUP BY CASE WHEN `t22`.`host` IS NOT NULL THEN `t16`.`host` ELSE CASE WHEN" - + " `t16`.`host` IS NULL THEN NULL ELSE 'OTHER' END END) `t24`) `t26`\n" - + "GROUP BY `@timestamp`, `host`\n" - + "ORDER BY `@timestamp` NULLS LAST, `host` NULLS LAST"; + + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n" + + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`@timestamp` NULLS LAST, 2 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -208,49 +189,24 @@ public void testTimechartWithSpan1h() { String ppl = "source=events | timechart span=1h count() by host"; RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `@timestamp`, `host`, SUM(`actual_count`) `count`\n" - + "FROM (SELECT CAST(`t1`.`@timestamp` AS TIMESTAMP) `@timestamp`, CASE WHEN" - + " `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE WHEN `t1`.`host` IS NULL THEN" - + " NULL ELSE 'OTHER' END END `host`, SUM(`t1`.`$f2_0`) `actual_count`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `$f2_0`\n" - + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t1`\n" - + "LEFT JOIN (SELECT `host`, SUM(`$f2_0`) `grand_total`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `$f2_0`\n" - + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t4`\n" - + "WHERE `host` IS NOT NULL\n" - + "GROUP BY `host`\n" - + "ORDER BY 2 DESC NULLS FIRST\n" - + "LIMIT 10) `t7` ON `t1`.`host` IS NOT DISTINCT FROM `t7`.`host`\n" - + "GROUP BY CAST(`t1`.`@timestamp` AS TIMESTAMP), CASE WHEN `t7`.`host` IS NOT NULL" - + " THEN `t1`.`host` ELSE CASE WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END" - + " END\n" - + "UNION\n" - + "SELECT CAST(`t13`.`@timestamp` AS TIMESTAMP) `@timestamp`, `t24`.`$f0` `host`, 0" - + " `count`\n" - + "FROM (SELECT `@timestamp`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`\n" + "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`host` ELSE 'OTHER' END `host`," + + " SUM(`t2`.`count()`) `count()`\n" + + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t12`\n" - + "GROUP BY `@timestamp`) `t13`\n" - + "CROSS JOIN (SELECT CASE WHEN `t22`.`host` IS NOT NULL THEN `t16`.`host` ELSE CASE" - + " WHEN `t16`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `$f0`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `$f2_0`\n" + + "WHERE `@timestamp` IS NOT NULL\n" + + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t2`\n" + + "LEFT JOIN (SELECT `host`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER (ORDER" + + " BY SUM(`count()`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `host`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t16`\n" - + "LEFT JOIN (SELECT `host`, SUM(`$f2_0`) `grand_total`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `$f2_0`\n" - + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t19`\n" + + "WHERE `@timestamp` IS NOT NULL\n" + + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t6`\n" + "WHERE `host` IS NOT NULL\n" - + "GROUP BY `host`\n" - + "ORDER BY 2 DESC NULLS FIRST\n" - + "LIMIT 10) `t22` ON `t16`.`host` IS NOT DISTINCT FROM `t22`.`host`\n" - + "GROUP BY CASE WHEN `t22`.`host` IS NOT NULL THEN `t16`.`host` ELSE CASE WHEN" - + " `t16`.`host` IS NULL THEN NULL ELSE 'OTHER' END END) `t24`) `t26`\n" - + "GROUP BY `@timestamp`, `host`\n" - + "ORDER BY `@timestamp` NULLS LAST, `host` NULLS LAST"; + + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n" + + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`host` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`@timestamp` NULLS LAST, 2 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -259,25 +215,25 @@ public void testTimechartWithSpan1m() { String ppl = "source=events | timechart span=1m avg(cpu_usage) by region"; RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `t1`.`@timestamp`, CASE WHEN `t7`.`region` IS NOT NULL THEN `t1`.`region` ELSE CASE" - + " WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END `region`, AVG(`t1`.`$f2`)" - + " `avg(cpu_usage)`\n" + "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`region` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`region` ELSE 'OTHER' END `region`," + + " AVG(`t2`.`avg(cpu_usage)`) `avg(cpu_usage)`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" - + " `$f2`\n" + + " `avg(cpu_usage)`\n" + "FROM `scott`.`events`\n" - + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t1`\n" - + "LEFT JOIN (SELECT `region`, AVG(`$f2`) `grand_total`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" - + " `$f2`\n" + + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" + + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t2`\n" + + "LEFT JOIN (SELECT `region`, SUM(`avg(cpu_usage)`) `__grand_total__`, ROW_NUMBER()" + + " OVER (ORDER BY SUM(`avg(cpu_usage)`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `region`, AVG(`cpu_usage`) `avg(cpu_usage)`\n" + "FROM `scott`.`events`\n" - + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t4`\n" + + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" + + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t6`\n" + "WHERE `region` IS NOT NULL\n" - + "GROUP BY `region`\n" - + "ORDER BY 2 DESC NULLS FIRST\n" - + "LIMIT 10) `t7` ON `t1`.`region` = `t7`.`region`\n" - + "GROUP BY `t1`.`@timestamp`, CASE WHEN `t7`.`region` IS NOT NULL THEN `t1`.`region`" - + " ELSE CASE WHEN `t1`.`region` IS NULL THEN NULL ELSE 'OTHER' END END\n" - + "ORDER BY `t1`.`@timestamp` NULLS LAST, 2 NULLS LAST"; + + "GROUP BY `region`) `t9` ON `t2`.`region` = `t9`.`region`\n" + + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`region` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`region` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`@timestamp` NULLS LAST, 2 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -296,27 +252,26 @@ public void testTimechartWithLimitAndUseOtherFalse() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `t1`.`@timestamp`, CASE WHEN `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE" - + " WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END `host`, AVG(`t1`.`$f2`)" - + " `avg(cpu_usage)`\n" + "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END `host`," + + " AVG(`t2`.`avg(cpu_usage)`) `avg(cpu_usage)`\n" + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" - + " `$f2`\n" + + " `avg(cpu_usage)`\n" + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t1`\n" - + "LEFT JOIN (SELECT `host`, AVG(`$f2`) `grand_total`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" - + " `$f2`\n" + + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" + + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t2`\n" + + "LEFT JOIN (SELECT `host`, SUM(`avg(cpu_usage)`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY SUM(`avg(cpu_usage)`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `host`, AVG(`cpu_usage`) `avg(cpu_usage)`\n" + "FROM `scott`.`events`\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t4`\n" + + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" + + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t6`\n" + "WHERE `host` IS NOT NULL\n" - + "GROUP BY `host`\n" - + "ORDER BY 2 DESC NULLS FIRST\n" - + "LIMIT 3) `t7` ON `t1`.`host` = `t7`.`host`\n" - + "GROUP BY `t1`.`@timestamp`, CASE WHEN `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE" - + " CASE WHEN `t1`.`host` IS NULL THEN NULL ELSE 'OTHER' END END\n" - + "HAVING CASE WHEN `t7`.`host` IS NOT NULL THEN `t1`.`host` ELSE CASE WHEN `t1`.`host`" - + " IS NULL THEN NULL ELSE 'OTHER' END END <> 'OTHER'\n" - + "ORDER BY `t1`.`@timestamp` NULLS LAST, 2 NULLS LAST"; + + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n" + + "WHERE `t9`.`_row_number_chart_` <= 3\n" + + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`@timestamp` NULLS LAST, 2 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 8cc207b6561..aea0a6e289d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -80,7 +80,6 @@ import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; -import org.opensearch.sql.ast.tree.Timechart; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.setting.Settings.Key; @@ -1244,10 +1243,17 @@ public void testTimechartWithPerSecondFunction() { assertEqual( "source=t | timechart per_second(a)", eval( - new Timechart(relation("t"), alias("per_second(a)", aggregate("sum", field("a")))) - .span(span(field("@timestamp"), intLiteral(1), SpanUnit.of("m"))) - .limit(10) - .useOther(true), + Chart.builder() + .child(relation("t")) + .rowSplit( + alias("@timestamp", span(field("@timestamp"), intLiteral(1), SpanUnit.of("m")))) + .columnSplit(null) + .aggregationFunction(alias("per_second(a)", aggregate("sum", field("a")))) + .arguments( + exprList( + argument("limit", intLiteral(10)), + argument("useother", booleanLiteral(true)))) + .build(), let( field("per_second(a)"), function( @@ -1269,10 +1275,17 @@ public void testTimechartWithPerMinuteFunction() { assertEqual( "source=t | timechart per_minute(a)", eval( - new Timechart(relation("t"), alias("per_minute(a)", aggregate("sum", field("a")))) - .span(span(field("@timestamp"), intLiteral(1), SpanUnit.of("m"))) - .limit(10) - .useOther(true), + Chart.builder() + .child(relation("t")) + .rowSplit( + alias("@timestamp", span(field("@timestamp"), intLiteral(1), SpanUnit.of("m")))) + .columnSplit(null) + .aggregationFunction(alias("per_minute(a)", aggregate("sum", field("a")))) + .arguments( + exprList( + argument("limit", intLiteral(10)), + argument("useother", booleanLiteral(true)))) + .build(), let( field("per_minute(a)"), function( @@ -1294,10 +1307,17 @@ public void testTimechartWithPerHourFunction() { assertEqual( "source=t | timechart per_hour(a)", eval( - new Timechart(relation("t"), alias("per_hour(a)", aggregate("sum", field("a")))) - .span(span(field("@timestamp"), intLiteral(1), SpanUnit.of("m"))) - .limit(10) - .useOther(true), + Chart.builder() + .child(relation("t")) + .rowSplit( + alias("@timestamp", span(field("@timestamp"), intLiteral(1), SpanUnit.of("m")))) + .columnSplit(null) + .aggregationFunction(alias("per_hour(a)", aggregate("sum", field("a")))) + .arguments( + exprList( + argument("limit", intLiteral(10)), + argument("useother", booleanLiteral(true)))) + .build(), let( field("per_hour(a)"), function( @@ -1319,10 +1339,17 @@ public void testTimechartWithPerDayFunction() { assertEqual( "source=t | timechart per_day(a)", eval( - new Timechart(relation("t"), alias("per_day(a)", aggregate("sum", field("a")))) - .span(span(field("@timestamp"), intLiteral(1), SpanUnit.of("m"))) - .limit(10) - .useOther(true), + Chart.builder() + .child(relation("t")) + .rowSplit( + alias("@timestamp", span(field("@timestamp"), intLiteral(1), SpanUnit.of("m")))) + .columnSplit(null) + .aggregationFunction(alias("per_day(a)", aggregate("sum", field("a")))) + .arguments( + exprList( + argument("limit", intLiteral(10)), + argument("useother", booleanLiteral(true)))) + .build(), let( field("per_day(a)"), function( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 5d10960ea6b..c5accc7f7ca 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -63,7 +63,7 @@ import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.RelevanceFieldList; import org.opensearch.sql.ast.expression.SpanUnit; -import org.opensearch.sql.ast.tree.Timechart; +import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.common.antlr.SyntaxCheckException; @@ -1395,16 +1395,19 @@ public void testTimeModifierEarliestWithStringValue() { public void testTimechartSpanParameter() { assertEqual( "source=t | timechart span=30m count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), - intLiteral(30), - SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(true) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(30), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) .build()); } @@ -1412,14 +1415,19 @@ public void testTimechartSpanParameter() { public void testTimechartLimitParameter() { assertEqual( "source=t | timechart limit=100 count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(1), SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(100) - .useOther(true) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(1), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(100)), argument("useother", booleanLiteral(true)))) .build()); } @@ -1434,26 +1442,36 @@ public void testTimechartNegativeLimitParameter() { public void testTimechartUseOtherWithBooleanLiteral() { assertEqual( "source=t | timechart useother=true count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(1), SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(true) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(1), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) .build()); assertEqual( "source=t | timechart useother=false count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(1), SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(false) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(1), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(false)))) .build()); } @@ -1461,50 +1479,70 @@ public void testTimechartUseOtherWithBooleanLiteral() { public void testTimechartUseOtherWithIdentifier() { assertEqual( "source=t | timechart useother=t count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(1), SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(true) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(1), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) .build()); assertEqual( "source=t | timechart useother=f count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(1), SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(false) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(1), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(false)))) .build()); assertEqual( "source=t | timechart useother=TRUE count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(1), SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(true) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(1), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) .build()); assertEqual( "source=t | timechart useother=FALSE count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(1), SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(false) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(1), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(false)))) .build()); } @@ -1568,42 +1606,55 @@ public void testVisitSpanLiteral() { // Test span literal with integer value and hour unit assertEqual( "source=t | timechart span=1h count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(1), SpanUnit.H)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(true) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(1), + SpanUnit.H))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) .build()); // Test span literal with decimal value and minute unit assertEqual( "source=t | timechart span=2m count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), intLiteral(2), SpanUnit.m)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(true) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(2), + SpanUnit.m))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) .build()); // Test span literal without unit (should use NONE unit) assertEqual( "source=t | timechart span=10 count()", - Timechart.builder() + Chart.builder() .child(relation("t")) - .binExpression( - span( - field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), - intLiteral(10), - SpanUnit.NONE)) - .aggregateFunction(aggregate("count", allFields())) - .limit(10) - .useOther(true) + .rowSplit( + alias( + "@timestamp", + span( + field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP), + intLiteral(10), + SpanUnit.NONE))) + .aggregationFunction(alias("count()", aggregate("count", allFields()))) + .arguments( + exprList( + argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) .build()); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index be8cffffb53..1a150439980 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -255,7 +255,7 @@ public void testReverseCommand() { @Test public void testTimechartCommand() { assertEquals( - "source=table | timechart span=span(identifier, *** m) limit=10 useother=true count() by" + "source=table | timechart limit=*** useother=*** count() by span(identifier, *** m)" + " identifier", anonymize("source=t | timechart count() by host")); } From 31a74a4c4b56d57a58200588eedd295848d74103 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 18 Nov 2025 23:12:44 -0800 Subject: [PATCH 45/99] added IT , add null cases, fixed format for integer/big integer Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 65 ++++++++++- docs/user/ppl/functions/conversion.rst | 17 ++- .../sql/ppl/ConversionFunctionIT.java | 110 ++++++++++++++++++ .../CalcitePPLToNumberFunctionTest.java | 66 +++++++++-- 4 files changed, 243 insertions(+), 15 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/ConversionFunctionIT.java diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 2557121b0c8..bd95986cd27 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -14,8 +14,8 @@ import org.apache.calcite.linq4j.tree.Expression; import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.rex.RexCall; -import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -38,7 +38,65 @@ public ToNumberFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.DOUBLE_FORCE_NULLABLE; + return (opBinding) -> { + // Try to determine if the result will be Long or Double based on the input + int base = 10; + if (opBinding.getOperandCount() > 1) { + + base = opBinding.getOperandLiteralValue(1, Integer.class); + } + + if (opBinding.getOperandCount() > 0 && opBinding.isOperandLiteral(0, false)) { + String literal = opBinding.getOperandLiteralValue(0, String.class); + if (literal != null) { + try { + // Check if it's a decimal number + if (base != 10) { + return opBinding + .getTypeFactory() + .createTypeWithNullability( + opBinding.getTypeFactory().createSqlType(SqlTypeName.BIGINT), true); + } + if (literal.contains(".")) { + return opBinding + .getTypeFactory() + .createTypeWithNullability( + opBinding + .getTypeFactory() + .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), + true); + } else { + // Check if it's an integer that fits in Long + Long.parseLong(literal); + return opBinding + .getTypeFactory() + .createTypeWithNullability( + opBinding + .getTypeFactory() + .createSqlType(org.apache.calcite.sql.type.SqlTypeName.BIGINT), + true); + } + } catch (NumberFormatException e) { + // If parsing fails, default to Double (matches the runtime behavior) + return opBinding + .getTypeFactory() + .createTypeWithNullability( + opBinding + .getTypeFactory() + .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), + true); + } + } + } + // Default to Double when we can't determine the type at compile time + return opBinding + .getTypeFactory() + .createTypeWithNullability( + opBinding + .getTypeFactory() + .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), + true); + }; } @Override @@ -52,7 +110,6 @@ public static class ToNumberImplementor implements NotNullImplementor { public Expression implement( RexToLixTranslator translator, RexCall call, List translatedOperands) { Expression fieldValue = translatedOperands.get(0); - int base = 10; if (translatedOperands.size() > 1) { Expression baseExpr = translatedOperands.get(1); return Expressions.call(ToNumberFunction.class, "toNumber", fieldValue, baseExpr); @@ -85,7 +142,7 @@ public static Number toNumber(String numStr, int base) { result = bigInteger.longValue(); } } catch (Exception e) { - + // Return null when parsing fails, matches function behavior } return result; } diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index c55e868f2cd..461a7d7fe1b 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -127,7 +127,7 @@ Description The following usage options are available, depending on the parameter types and the number of parameters. -Usage: tonumber(string, [base]) converts the value in first argument to provided base type string in second argument. If second argument is not provided, then it converts to base 10 number representation. +Usage: tonumber(string, [base]) converts the value in first argument. The second argument describe the base of first argument. If second argument is not provided, then it converts to base 10 number representation. Return type: Number @@ -145,7 +145,7 @@ Following example converts a string in binary to the number representation:: +-----------+ | int_value | |-----------| - | 21.0 | + | 21 | +-----------+ @@ -156,7 +156,7 @@ Following example converts a string in hex to the number representation:: +-----------+ | int_value | |-----------| - | 64052.0 | + | 64052 | +-----------+ Following example converts a string in decimal to the number representation:: @@ -166,9 +166,18 @@ Following example converts a string in decimal to the number representation:: +-----------+ | int_value | |-----------| - | 4598.0 | + | 4598 | +-----------+ +Following example converts a string in decimal with fraction to the number representation:: + + os> source=people | eval double_value = tonumber('4598.678') | fields double_value | head 1 + fetched rows / total rows = 1/1 + +--------------+ + | double_value | + |--------------| + | 4598.678 | + +--------------+ TOSTRING ----------- diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ConversionFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ConversionFunctionIT.java new file mode 100644 index 00000000000..95ff1a73cf0 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ConversionFunctionIT.java @@ -0,0 +1,110 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.MatcherUtils.*; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; + +public class ConversionFunctionIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + loadIndex(Index.ACCOUNT); + } + + @Test + public void testDecimal() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s |head 1| eval a = tonumber('4598.678') | fields a", + TEST_INDEX_ACCOUNT)); + + verifySchema(actual, schema("a", "double")); + + verifyDataRows(actual, rows(4598.678)); + } + + @Test + public void testHex() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s |head 1| eval a = tonumber('FF12CA',16) | fields a", + TEST_INDEX_ACCOUNT)); + + verifySchema(actual, schema("a", "bigint")); + + verifyDataRows(actual, rows(16716490)); + } + + @Test + public void testBinary() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s |head 1| eval a = tonumber('0110111',2) | fields a", + TEST_INDEX_ACCOUNT)); + + verifySchema(actual, schema("a", "bigint")); + + verifyDataRows(actual, rows(55)); + } + + @Test + public void testOctal() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s |head 1| eval a = tonumber('20415442',8) | fields a", + TEST_INDEX_ACCOUNT)); + + verifySchema(actual, schema("a", "bigint")); + + verifyDataRows(actual, rows(4332322)); + } + + @Test + public void testOctalWithUnsupportedValue() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s |head 1| eval a = tonumber('20415.442',8) | fields a", + TEST_INDEX_ACCOUNT)); + + verifySchema(actual, schema("a", "bigint")); + + assertEquals(actual.getJSONArray("datarows").getJSONArray(0).get(0), null); + } + + @Test + public void testBinaryWithUnsupportedValue() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s |head 1| eval a = tonumber('1010.11',2) | fields a", + TEST_INDEX_ACCOUNT)); + + verifySchema(actual, schema("a", "bigint")); + + assertEquals(actual.getJSONArray("datarows").getJSONArray(0).get(0), null); + } + + @Test + public void testHexWithUnsupportedValue() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s |head 1| eval a = tonumber('A.B',16) | fields a", TEST_INDEX_ACCOUNT)); + + verifySchema(actual, schema("a", "bigint")); + + assertEquals(actual.getJSONArray("datarows").getJSONArray(0).get(0), null); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java index bd4f2c293b6..08a4cd7f9a8 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java @@ -24,7 +24,7 @@ public void testNumberBinary() { + " LogicalProject(int_value=[TONUMBER('010101':VARCHAR, 2)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "int_value=21.0\n"; + String expectedResult = "int_value=21\n"; verifyResult(root, expectedResult); String expectedSparkSql = @@ -32,6 +32,23 @@ public void testNumberBinary() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testNumberBinaryUnsupportedResultNull() { + String ppl = "source=EMP | eval int_value = tonumber('010.101',2) | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(int_value=[TONUMBER('010.101':VARCHAR, 2)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TONUMBER`('010.101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testNumberHex() { String ppl = "source=EMP | eval int_value = tonumber('FA34',16) | fields int_value|head 1"; @@ -41,7 +58,7 @@ public void testNumberHex() { + " LogicalProject(int_value=[TONUMBER('FA34':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "int_value=64052.0\n"; + String expectedResult = "int_value=64052\n"; verifyResult(root, expectedResult); String expectedSparkSql = @@ -49,6 +66,24 @@ public void testNumberHex() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testNumberHexUnsupportedValuesResultNull() { + String ppl = + "source=EMP | eval double_value = tonumber('FA.34',16) | fields double_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(double_value=[TONUMBER('FA.34':VARCHAR, 16)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "double_value=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TONUMBER`('FA.34', 16) `double_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testNumberHexMinLimit() { String ppl = @@ -60,7 +95,7 @@ public void testNumberHexMinLimit() { + " LogicalProject(long_value=[TONUMBER('-7FFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "long_value=-9.223372036854776E18\n"; + String expectedResult = "long_value=-9223372036854775807\n"; verifyResult(root, expectedResult); String expectedSparkSql = @@ -80,7 +115,7 @@ public void testNumberHexMaxLimit() { + " LogicalProject(long_value=[TONUMBER('7FFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "long_value=9.223372036854776E18\n"; + String expectedResult = "long_value=9223372036854775807\n"; verifyResult(root, expectedResult); String expectedSparkSql = @@ -100,7 +135,7 @@ public void testNumberHexOverNegativeMaxLimit() { + " LogicalProject(long_value=[TONUMBER('-FFFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "long_value=1.0\n"; + String expectedResult = "long_value=1\n"; verifyResult(root, expectedResult); String expectedSparkSql = @@ -119,7 +154,7 @@ public void testNumberHexOverPositiveMaxLimit() { + " LogicalProject(long_value=[TONUMBER('FFFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "long_value=-1.0\n"; + String expectedResult = "long_value=-1\n"; verifyResult(root, expectedResult); String expectedSparkSql = @@ -137,7 +172,7 @@ public void testNumber() { + " LogicalProject(int_value=[TONUMBER('4598':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "int_value=4598.0\n"; + String expectedResult = "int_value=4598\n"; verifyResult(root, expectedResult); String expectedSparkSql = "SELECT `TONUMBER`('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; @@ -160,4 +195,21 @@ public void testNumberDecimal() { "SELECT `TONUMBER`('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testNumberUnsupportedResultNull() { + String ppl = "source=EMP | eval int_value = tonumber('4A598.54922') | fields int_value|head 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(fetch=[1])\n" + + " LogicalProject(int_value=[TONUMBER('4A598.54922':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "int_value=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `TONUMBER`('4A598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } From a42d6aa55165a8a169e2c3af64ad06f2e87b14b8 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 18 Nov 2025 23:41:56 -0800 Subject: [PATCH 46/99] added IT , add null cases, fixed format for integer/big integer Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 48 +++++-------------- .../function/udf/ToNumberFunctionTest.java | 6 --- 2 files changed, 11 insertions(+), 43 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index bd95986cd27..8f2b5e5acab 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -41,50 +41,24 @@ public SqlReturnTypeInference getReturnTypeInference() { return (opBinding) -> { // Try to determine if the result will be Long or Double based on the input int base = 10; - if (opBinding.getOperandCount() > 1) { - - base = opBinding.getOperandLiteralValue(1, Integer.class); + try { + base = + opBinding.getOperandCount() > 1 + ? opBinding.getOperandLiteralValue(1, Integer.class) + : 10; + } catch (NumberFormatException e) { + // If parsing fails, default to base 10 } - if (opBinding.getOperandCount() > 0 && opBinding.isOperandLiteral(0, false)) { String literal = opBinding.getOperandLiteralValue(0, String.class); if (literal != null) { - try { - // Check if it's a decimal number - if (base != 10) { - return opBinding - .getTypeFactory() - .createTypeWithNullability( - opBinding.getTypeFactory().createSqlType(SqlTypeName.BIGINT), true); - } - if (literal.contains(".")) { - return opBinding - .getTypeFactory() - .createTypeWithNullability( - opBinding - .getTypeFactory() - .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), - true); - } else { - // Check if it's an integer that fits in Long - Long.parseLong(literal); - return opBinding - .getTypeFactory() - .createTypeWithNullability( - opBinding - .getTypeFactory() - .createSqlType(org.apache.calcite.sql.type.SqlTypeName.BIGINT), - true); - } - } catch (NumberFormatException e) { - // If parsing fails, default to Double (matches the runtime behavior) + + // Check if it's a decimal number + if (base != 10 || !(literal.contains("."))) { return opBinding .getTypeFactory() .createTypeWithNullability( - opBinding - .getTypeFactory() - .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), - true); + opBinding.getTypeFactory().createSqlType(SqlTypeName.BIGINT), true); } } } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java index 34ed102ec5b..b941f8268e5 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java @@ -7,7 +7,6 @@ import static org.junit.jupiter.api.Assertions.*; -import org.apache.calcite.sql.type.ReturnTypes; import org.junit.jupiter.api.Test; import org.opensearch.sql.calcite.utils.PPLOperandTypes; @@ -15,11 +14,6 @@ public class ToNumberFunctionTest { private final ToNumberFunction function = new ToNumberFunction(); - @Test - void testGetReturnTypeInference() { - assertEquals(ReturnTypes.DOUBLE_FORCE_NULLABLE, function.getReturnTypeInference()); - } - @Test void testGetOperandMetadata() { assertEquals(PPLOperandTypes.STRING_OR_STRING_INTEGER, function.getOperandMetadata()); From 1ecbbbbd975395f8d6797bbe1cea39f67688d135 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 18 Nov 2025 23:43:10 -0800 Subject: [PATCH 47/99] added IT , add null cases, fixed format for integer/big integer Signed-off-by: Asif Bashar --- .../sql/expression/function/udf/ToNumberFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 8f2b5e5acab..4a20c679e09 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -62,7 +62,7 @@ public SqlReturnTypeInference getReturnTypeInference() { } } } - // Default to Double when we can't determine the type at compile time + // Default to Double when we can't determine the type at compile time or bigint type is confirmed return opBinding .getTypeFactory() .createTypeWithNullability( From 188a28ec223f5010ab6369c076cbd8ae53b88542 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 20 Nov 2025 10:08:07 -0800 Subject: [PATCH 48/99] spotless apply Signed-off-by: Asif Bashar --- .../sql/expression/function/udf/ToNumberFunction.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 4a20c679e09..d1604d53bd3 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -62,7 +62,8 @@ public SqlReturnTypeInference getReturnTypeInference() { } } } - // Default to Double when we can't determine the type at compile time or bigint type is confirmed + // Default to Double when we can't determine the type at compile time or bigint type is + // confirmed return opBinding .getTypeFactory() .createTypeWithNullability( From c26261b4bbaddb5c4fb9e6760ccab1e772186d50 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Thu, 20 Nov 2025 12:30:34 -0800 Subject: [PATCH 49/99] spotless apply Signed-off-by: Asif Bashar --- .../CalcitePPLToNumberFunctionTest.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java index 08a4cd7f9a8..c30590fa268 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java @@ -28,7 +28,7 @@ public void testNumberBinary() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -45,7 +45,7 @@ public void testNumberBinaryUnsupportedResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('010.101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('010.101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -62,7 +62,7 @@ public void testNumberHex() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -80,7 +80,7 @@ public void testNumberHexUnsupportedValuesResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('FA.34', 16) `double_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('FA.34', 16) `double_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -99,7 +99,7 @@ public void testNumberHexMinLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -119,7 +119,7 @@ public void testNumberHexMaxLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -139,7 +139,7 @@ public void testNumberHexOverNegativeMaxLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('-FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('-FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -158,7 +158,7 @@ public void testNumberHexOverPositiveMaxLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -175,7 +175,7 @@ public void testNumber() { String expectedResult = "int_value=4598\n"; verifyResult(root, expectedResult); - String expectedSparkSql = "SELECT `TONUMBER`('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + String expectedSparkSql = "SELECT TONUMBER('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -192,7 +192,7 @@ public void testNumberDecimal() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -209,7 +209,7 @@ public void testNumberUnsupportedResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('4A598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('4A598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } } From f30488aaf95afbd31f5b9f3fecefb277e9aae3c7 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 24 Nov 2025 19:14:35 -0600 Subject: [PATCH 50/99] fix javadoc Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index d1604d53bd3..dd6de40bdba 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -21,13 +21,23 @@ import org.opensearch.sql.expression.function.UDFOperandMetadata; /** - * A custom implementation of number/boolean to string . + * The following usage options are available, depending on the parameter types + * and the number of parameters. * - *

This operator is necessary because tostring has following requirements "binary" Converts a - * number to a binary value. "hex" Converts the number to a hexadecimal value. "commas" Formats the - * number with commas. If the number includes a decimal, the function rounds the number to nearest - * two decimal places. "duration" Converts the value in seconds to the readable time format - * HH:MM:SS. if not format parameter provided, then consider value as boolean + *

Usage: {@code tonumber(string, [base])} converts the value in the first + * argument. The second argument describes the base of the first argument. + * If the second argument is not provided, the value is converted using base 10. + * + *

Return type: Number + * + *

You can use this function with the eval commands and as part of eval expressions. + * + *

Base values can range from 2 to 36. + * The maximum value supported for base 10 is {@code +(2 − 2^-52) · 2^1023} + * and the minimum is {@code −(2 − 2^-52) · 2^1023}. + * + *

The maximum for other supported bases is {@code 2^63 − 1} (or {@code 7FFFFFFFFFFFFFFF}) + * and the minimum is {@code -2^63} (or {@code -7FFFFFFFFFFFFFFF}). */ public class ToNumberFunction extends ImplementorUDF { public ToNumberFunction() { From 1ae5665191a8fdc2348306ae2991e3710aae77b4 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 24 Nov 2025 19:33:43 -0600 Subject: [PATCH 51/99] fix doc to add null value return condition details Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 461a7d7fe1b..9f40befd53a 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -135,7 +135,7 @@ Return type: Number You can use this function with the eval commands and as part of eval expressions. Base values can be between 2 and 36. The maximum value supported for base 10 is +(2-2^-52)·2^1023 and minimum is -(2-2^-52)·2^1023. The maximum for other supported bases is 2^63-1 (or 7FFFFFFFFFFFFFFF) and minimum is -2^63 (or -7FFFFFFFFFFFFFFF). - +If the tonumber function cannot parse a field value to a number, the function returns NULL. You can use this function to convert a string representation of a binary number to return the corresponding number in base 10. Following example converts a string in binary to the number representation:: From dde19961e49353fe778d2e3d667a759c1bf3dfc1 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 25 Nov 2025 13:07:54 -0600 Subject: [PATCH 52/99] spotless Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index dd6de40bdba..1c3a836cc84 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -21,23 +21,22 @@ import org.opensearch.sql.expression.function.UDFOperandMetadata; /** - * The following usage options are available, depending on the parameter types - * and the number of parameters. + * The following usage options are available, depending on the parameter types and the number of + * parameters. * - *

Usage: {@code tonumber(string, [base])} converts the value in the first - * argument. The second argument describes the base of the first argument. - * If the second argument is not provided, the value is converted using base 10. + *

Usage: {@code tonumber(string, [base])} converts the value in the first argument. The + * second argument describes the base of the first argument. If the second argument is not provided, + * the value is converted using base 10. * *

Return type: Number * *

You can use this function with the eval commands and as part of eval expressions. * - *

Base values can range from 2 to 36. - * The maximum value supported for base 10 is {@code +(2 − 2^-52) · 2^1023} - * and the minimum is {@code −(2 − 2^-52) · 2^1023}. + *

Base values can range from 2 to 36. The maximum value supported for base 10 is {@code +(2 − + * 2^-52) · 2^1023} and the minimum is {@code −(2 − 2^-52) · 2^1023}. * - *

The maximum for other supported bases is {@code 2^63 − 1} (or {@code 7FFFFFFFFFFFFFFF}) - * and the minimum is {@code -2^63} (or {@code -7FFFFFFFFFFFFFFF}). + *

The maximum for other supported bases is {@code 2^63 − 1} (or {@code 7FFFFFFFFFFFFFFF}) and + * the minimum is {@code -2^63} (or {@code -7FFFFFFFFFFFFFFF}). */ public class ToNumberFunction extends ImplementorUDF { public ToNumberFunction() { From b7985fedcc5525146368a2757919d199cdd89d89 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 9 Dec 2025 09:36:44 -0800 Subject: [PATCH 53/99] Revert "added IT , add null cases, fixed format for integer/big integer" Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 48 ++++++++++++++----- .../function/udf/ToNumberFunctionTest.java | 6 +++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 1c3a836cc84..0bbc9d70660 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -50,24 +50,50 @@ public SqlReturnTypeInference getReturnTypeInference() { return (opBinding) -> { // Try to determine if the result will be Long or Double based on the input int base = 10; - try { - base = - opBinding.getOperandCount() > 1 - ? opBinding.getOperandLiteralValue(1, Integer.class) - : 10; - } catch (NumberFormatException e) { - // If parsing fails, default to base 10 + if (opBinding.getOperandCount() > 1) { + + base = opBinding.getOperandLiteralValue(1, Integer.class); } + if (opBinding.getOperandCount() > 0 && opBinding.isOperandLiteral(0, false)) { String literal = opBinding.getOperandLiteralValue(0, String.class); if (literal != null) { - - // Check if it's a decimal number - if (base != 10 || !(literal.contains("."))) { + try { + // Check if it's a decimal number + if (base != 10) { + return opBinding + .getTypeFactory() + .createTypeWithNullability( + opBinding.getTypeFactory().createSqlType(SqlTypeName.BIGINT), true); + } + if (literal.contains(".")) { + return opBinding + .getTypeFactory() + .createTypeWithNullability( + opBinding + .getTypeFactory() + .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), + true); + } else { + // Check if it's an integer that fits in Long + Long.parseLong(literal); + return opBinding + .getTypeFactory() + .createTypeWithNullability( + opBinding + .getTypeFactory() + .createSqlType(org.apache.calcite.sql.type.SqlTypeName.BIGINT), + true); + } + } catch (NumberFormatException e) { + // If parsing fails, default to Double (matches the runtime behavior) return opBinding .getTypeFactory() .createTypeWithNullability( - opBinding.getTypeFactory().createSqlType(SqlTypeName.BIGINT), true); + opBinding + .getTypeFactory() + .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), + true); } } } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java index b941f8268e5..34ed102ec5b 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ToNumberFunctionTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.*; +import org.apache.calcite.sql.type.ReturnTypes; import org.junit.jupiter.api.Test; import org.opensearch.sql.calcite.utils.PPLOperandTypes; @@ -14,6 +15,11 @@ public class ToNumberFunctionTest { private final ToNumberFunction function = new ToNumberFunction(); + @Test + void testGetReturnTypeInference() { + assertEquals(ReturnTypes.DOUBLE_FORCE_NULLABLE, function.getReturnTypeInference()); + } + @Test void testGetOperandMetadata() { assertEquals(PPLOperandTypes.STRING_OR_STRING_INTEGER, function.getOperandMetadata()); From 740dc90733f830a2ed55030029b6a6e7aa41df1f Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 9 Dec 2025 11:12:07 -0800 Subject: [PATCH 54/99] for all cases, return double Signed-off-by: Asif Bashar --- .../function/udf/ToNumberFunction.java | 65 ++----------------- docs/user/ppl/functions/conversion.rst | 6 +- .../sql/ppl/ConversionFunctionIT.java | 12 ++-- .../CalcitePPLToNumberFunctionTest.java | 36 +++++----- 4 files changed, 31 insertions(+), 88 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java index 0bbc9d70660..bc0a6dffa8d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ToNumberFunction.java @@ -14,8 +14,8 @@ import org.apache.calcite.linq4j.tree.Expression; import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; -import org.apache.calcite.sql.type.SqlTypeName; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -47,66 +47,8 @@ public ToNumberFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return (opBinding) -> { - // Try to determine if the result will be Long or Double based on the input - int base = 10; - if (opBinding.getOperandCount() > 1) { - - base = opBinding.getOperandLiteralValue(1, Integer.class); - } - if (opBinding.getOperandCount() > 0 && opBinding.isOperandLiteral(0, false)) { - String literal = opBinding.getOperandLiteralValue(0, String.class); - if (literal != null) { - try { - // Check if it's a decimal number - if (base != 10) { - return opBinding - .getTypeFactory() - .createTypeWithNullability( - opBinding.getTypeFactory().createSqlType(SqlTypeName.BIGINT), true); - } - if (literal.contains(".")) { - return opBinding - .getTypeFactory() - .createTypeWithNullability( - opBinding - .getTypeFactory() - .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), - true); - } else { - // Check if it's an integer that fits in Long - Long.parseLong(literal); - return opBinding - .getTypeFactory() - .createTypeWithNullability( - opBinding - .getTypeFactory() - .createSqlType(org.apache.calcite.sql.type.SqlTypeName.BIGINT), - true); - } - } catch (NumberFormatException e) { - // If parsing fails, default to Double (matches the runtime behavior) - return opBinding - .getTypeFactory() - .createTypeWithNullability( - opBinding - .getTypeFactory() - .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), - true); - } - } - } - // Default to Double when we can't determine the type at compile time or bigint type is - // confirmed - return opBinding - .getTypeFactory() - .createTypeWithNullability( - opBinding - .getTypeFactory() - .createSqlType(org.apache.calcite.sql.type.SqlTypeName.DOUBLE), - true); - }; + return ReturnTypes.DOUBLE_FORCE_NULLABLE; } @Override @@ -120,6 +62,7 @@ public static class ToNumberImplementor implements NotNullImplementor { public Expression implement( RexToLixTranslator translator, RexCall call, List translatedOperands) { Expression fieldValue = translatedOperands.get(0); + int base = 10; if (translatedOperands.size() > 1) { Expression baseExpr = translatedOperands.get(1); return Expressions.call(ToNumberFunction.class, "toNumber", fieldValue, baseExpr); @@ -152,7 +95,7 @@ public static Number toNumber(String numStr, int base) { result = bigInteger.longValue(); } } catch (Exception e) { - // Return null when parsing fails, matches function behavior + } return result; } diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index 9f40befd53a..93eee712d94 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -145,7 +145,7 @@ Following example converts a string in binary to the number representation:: +-----------+ | int_value | |-----------| - | 21 | + | 21.0 | +-----------+ @@ -156,7 +156,7 @@ Following example converts a string in hex to the number representation:: +-----------+ | int_value | |-----------| - | 64052 | + | 64052.0 | +-----------+ Following example converts a string in decimal to the number representation:: @@ -166,7 +166,7 @@ Following example converts a string in decimal to the number representation:: +-----------+ | int_value | |-----------| - | 4598 | + | 4598.0 | +-----------+ Following example converts a string in decimal with fraction to the number representation:: diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ConversionFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ConversionFunctionIT.java index 95ff1a73cf0..9a870f1e49e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ConversionFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ConversionFunctionIT.java @@ -39,7 +39,7 @@ public void testHex() throws IOException { "source=%s |head 1| eval a = tonumber('FF12CA',16) | fields a", TEST_INDEX_ACCOUNT)); - verifySchema(actual, schema("a", "bigint")); + verifySchema(actual, schema("a", "double")); verifyDataRows(actual, rows(16716490)); } @@ -52,7 +52,7 @@ public void testBinary() throws IOException { "source=%s |head 1| eval a = tonumber('0110111',2) | fields a", TEST_INDEX_ACCOUNT)); - verifySchema(actual, schema("a", "bigint")); + verifySchema(actual, schema("a", "double")); verifyDataRows(actual, rows(55)); } @@ -65,7 +65,7 @@ public void testOctal() throws IOException { "source=%s |head 1| eval a = tonumber('20415442',8) | fields a", TEST_INDEX_ACCOUNT)); - verifySchema(actual, schema("a", "bigint")); + verifySchema(actual, schema("a", "double")); verifyDataRows(actual, rows(4332322)); } @@ -78,7 +78,7 @@ public void testOctalWithUnsupportedValue() throws IOException { "source=%s |head 1| eval a = tonumber('20415.442',8) | fields a", TEST_INDEX_ACCOUNT)); - verifySchema(actual, schema("a", "bigint")); + verifySchema(actual, schema("a", "double")); assertEquals(actual.getJSONArray("datarows").getJSONArray(0).get(0), null); } @@ -91,7 +91,7 @@ public void testBinaryWithUnsupportedValue() throws IOException { "source=%s |head 1| eval a = tonumber('1010.11',2) | fields a", TEST_INDEX_ACCOUNT)); - verifySchema(actual, schema("a", "bigint")); + verifySchema(actual, schema("a", "double")); assertEquals(actual.getJSONArray("datarows").getJSONArray(0).get(0), null); } @@ -103,7 +103,7 @@ public void testHexWithUnsupportedValue() throws IOException { String.format( "source=%s |head 1| eval a = tonumber('A.B',16) | fields a", TEST_INDEX_ACCOUNT)); - verifySchema(actual, schema("a", "bigint")); + verifySchema(actual, schema("a", "double")); assertEquals(actual.getJSONArray("datarows").getJSONArray(0).get(0), null); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java index c30590fa268..0639473a72a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java @@ -24,11 +24,11 @@ public void testNumberBinary() { + " LogicalProject(int_value=[TONUMBER('010101':VARCHAR, 2)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "int_value=21\n"; + String expectedResult = "int_value=21.0\n"; verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -45,7 +45,7 @@ public void testNumberBinaryUnsupportedResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('010.101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('010.101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -58,11 +58,11 @@ public void testNumberHex() { + " LogicalProject(int_value=[TONUMBER('FA34':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "int_value=64052\n"; + String expectedResult = "int_value=64052.0\n"; verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -80,7 +80,7 @@ public void testNumberHexUnsupportedValuesResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('FA.34', 16) `double_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('FA.34', 16) `double_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -95,11 +95,11 @@ public void testNumberHexMinLimit() { + " LogicalProject(long_value=[TONUMBER('-7FFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "long_value=-9223372036854775807\n"; + String expectedResult = "long_value=-9.223372036854776E18\n"; verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -115,11 +115,11 @@ public void testNumberHexMaxLimit() { + " LogicalProject(long_value=[TONUMBER('7FFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "long_value=9223372036854775807\n"; + String expectedResult = "long_value=9.223372036854776E18\n"; verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -135,11 +135,11 @@ public void testNumberHexOverNegativeMaxLimit() { + " LogicalProject(long_value=[TONUMBER('-FFFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "long_value=1\n"; + String expectedResult = "long_value=1.0\n"; verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('-FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('-FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -154,11 +154,11 @@ public void testNumberHexOverPositiveMaxLimit() { + " LogicalProject(long_value=[TONUMBER('FFFFFFFFFFFFFFFF':VARCHAR, 16)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "long_value=-1\n"; + String expectedResult = "long_value=-1.0\n"; verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -172,10 +172,10 @@ public void testNumber() { + " LogicalProject(int_value=[TONUMBER('4598':VARCHAR)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedResult = "int_value=4598\n"; + String expectedResult = "int_value=4598.0\n"; verifyResult(root, expectedResult); - String expectedSparkSql = "SELECT TONUMBER('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + String expectedSparkSql = "SELECT `TONUMBER`('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -192,7 +192,7 @@ public void testNumberDecimal() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -209,7 +209,7 @@ public void testNumberUnsupportedResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT TONUMBER('4A598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `TONUMBER`('4A598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } } From aac7a103c7f76770c7c797ef14cb26c4e6e89372 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 9 Dec 2025 11:51:13 -0800 Subject: [PATCH 55/99] unit test Signed-off-by: Asif Bashar --- .../CalcitePPLToNumberFunctionTest.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java index 0639473a72a..f92e94519e2 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLToNumberFunctionTest.java @@ -28,7 +28,7 @@ public void testNumberBinary() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('010101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -45,7 +45,7 @@ public void testNumberBinaryUnsupportedResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('010.101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('010.101', 2) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -62,7 +62,7 @@ public void testNumberHex() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('FA34', 16) `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -80,7 +80,7 @@ public void testNumberHexUnsupportedValuesResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('FA.34', 16) `double_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('FA.34', 16) `double_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -99,7 +99,7 @@ public void testNumberHexMinLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('-7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -119,7 +119,7 @@ public void testNumberHexMaxLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('7FFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -139,7 +139,7 @@ public void testNumberHexOverNegativeMaxLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('-FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('-FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -158,7 +158,7 @@ public void testNumberHexOverPositiveMaxLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('FFFFFFFFFFFFFFFF', 16) `long_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -175,7 +175,7 @@ public void testNumber() { String expectedResult = "int_value=4598.0\n"; verifyResult(root, expectedResult); - String expectedSparkSql = "SELECT `TONUMBER`('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + String expectedSparkSql = "SELECT TONUMBER('4598') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -192,7 +192,7 @@ public void testNumberDecimal() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('4598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -209,7 +209,7 @@ public void testNumberUnsupportedResultNull() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `TONUMBER`('4A598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT TONUMBER('4A598.54922') `int_value`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } } From f442d873f6b43bb0e1c6f83cf3f2f6b71290babd Mon Sep 17 00:00:00 2001 From: Xinyu Hao <75524174+ishaoxy@users.noreply.github.com> Date: Thu, 13 Nov 2025 10:55:09 +0800 Subject: [PATCH 56/99] Fix bug that `Streamstats` command incorrectly treats null as a valid group (#4777) * use null to cover the aggregate value when group value is null Signed-off-by: Xinyu Hao * fix test Signed-off-by: Xinyu Hao * fix some problems Signed-off-by: Xinyu Hao * add IT case Signed-off-by: Xinyu Hao --------- Signed-off-by: Xinyu Hao --- .../sql/calcite/CalciteRelNodeVisitor.java | 50 ++- .../sql/calcite/remote/CalciteExplainIT.java | 2 + .../remote/CalciteStreamstatsCommandIT.java | 372 +++++++++++------- .../explain_streamstats_distinct_count.yaml | 9 +- .../explain_streamstats_earliest_latest.yaml | 9 +- ...reamstats_earliest_latest_custom_time.yaml | 9 +- .../explain_streamstats_distinct_count.yaml | 11 +- .../explain_streamstats_earliest_latest.yaml | 11 +- ...reamstats_earliest_latest_custom_time.yaml | 11 +- .../calcite/CalcitePPLStreamstatsTest.java | 20 +- 10 files changed, 310 insertions(+), 194 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 2b3b1fd88e7..4848415c360 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1703,7 +1703,10 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) new String[] {ROW_NUMBER_COLUMN_FOR_STREAMSTATS}); } - // Default + // Default: first get rawExpr + List overExpressions = + node.getWindowFunctionList().stream().map(w -> rexVisitor.analyze(w, context)).toList(); + if (hasGroup) { // only build sequence when there is by condition RexNode streamSeq = @@ -1714,21 +1717,54 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(streamSeq); - } - List overExpressions = - node.getWindowFunctionList().stream().map(w -> rexVisitor.analyze(w, context)).toList(); - context.relBuilder.projectPlus(overExpressions); + // construct groupNotNull predicate + List groupByList = + groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); + List notNullList = + PlanUtils.getSelectColumns(groupByList).stream() + .map(context.relBuilder::field) + .map(context.relBuilder::isNotNull) + .toList(); + RexNode groupNotNull = context.relBuilder.and(notNullList); - // resort when there is by condition - if (hasGroup) { + // wrap each expr: CASE WHEN groupNotNull THEN rawExpr ELSE CAST(NULL AS rawType) END + List wrappedOverExprs = + wrapWindowFunctionsWithGroupNotNull(overExpressions, groupNotNull, context); + context.relBuilder.projectPlus(wrappedOverExprs); + // resort when there is by condition context.relBuilder.sort(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); + } else { + context.relBuilder.projectPlus(overExpressions); } return context.relBuilder.peek(); } + private List wrapWindowFunctionsWithGroupNotNull( + List overExpressions, RexNode groupNotNull, CalcitePlanContext context) { + List wrappedOverExprs = new ArrayList<>(overExpressions.size()); + for (RexNode overExpr : overExpressions) { + RexNode rawExpr = overExpr; + String aliasName = null; + if (overExpr instanceof RexCall rc && rc.getOperator() == SqlStdOperatorTable.AS) { + rawExpr = rc.getOperands().get(0); + if (rc.getOperands().size() >= 2 && rc.getOperands().get(1) instanceof RexLiteral lit) { + aliasName = lit.getValueAs(String.class); + } + } + RexNode nullLiteral = context.rexBuilder.makeNullLiteral(rawExpr.getType()); + RexNode caseExpr = + context.rexBuilder.makeCall(SqlStdOperatorTable.CASE, groupNotNull, rawExpr, nullLiteral); + if (aliasName != null) { + caseExpr = context.relBuilder.alias(caseExpr, aliasName); + } + wrappedOverExprs.add(caseExpr); + } + return wrappedOverExprs; + } + private RelNode buildStreamWindowJoinPlan( CalcitePlanContext context, RelNode leftWithHelpers, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 7a454208826..52fb467f5bc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -745,6 +745,7 @@ public void testExplainOnFirstLast() throws IOException { } // Only for Calcite + @Test public void testExplainOnEventstatsEarliestLatest() throws IOException { String expected = loadExpectedPlan("explain_eventstats_earliest_latest.json"); assertJsonEqualsIgnoreId( @@ -782,6 +783,7 @@ public void testExplainOnEventstatsEarliestLatestNoGroupBy() throws IOException TEST_INDEX_LOGS))); } + @Test public void testExplainOnStreamstatsEarliestLatest() throws IOException { String expected = loadExpectedPlan("explain_streamstats_earliest_latest.yaml"); assertYamlEqualsIgnoreId( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java index ee94c218dbb..ae1fb4f2b1c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java @@ -147,7 +147,7 @@ public void testStreamstatsByWithNull() throws IOException { rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5, 20, 25), rows(null, "Canada", null, 4, 2023, 10, 3, 18.333333333333332, 10, 25), - rows("Kevin", null, null, 4, 2023, null, 1, null, null, null)); + rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); actual = executeQuery( @@ -161,8 +161,8 @@ public void testStreamstatsByWithNull() throws IOException { rows("Hello", "USA", "New York", 4, 2023, 30, 1, 30, 30, 30), rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 1, 20, 20, 20), - rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), - rows("Kevin", null, null, 4, 2023, null, 2, 10, 10, 10)); + rows(null, "Canada", null, 4, 2023, 10, null, null, null, null), + rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); } @Test @@ -198,7 +198,7 @@ public void testStreamstatsBySpanWithNull() throws IOException { rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5, 20, 25), rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), - rows("Kevin", null, null, 4, 2023, null, 1, null, null, null)); + rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); } @Test @@ -251,7 +251,7 @@ public void testStreamstatsByMultiplePartitionsWithNull1() throws IOException { rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5, 20, 25), rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), - rows("Kevin", null, null, 4, 2023, null, 1, null, null, null)); + rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); } @Test @@ -269,8 +269,8 @@ public void testStreamstatsByMultiplePartitionsWithNull2() throws IOException { rows("Hello", "USA", "New York", 4, 2023, 30, 1, 30, 30, 30), rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 1, 20, 20, 20), - rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), - rows("Kevin", null, null, 4, 2023, null, 1, null, null, null)); + rows(null, "Canada", null, 4, 2023, 10, null, null, null, null), + rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); } @Test @@ -340,6 +340,7 @@ public void testStreamstatsWindowWithNull() throws IOException { rows("Kevin", null, null, 4, 2023, null, 15)); } + @Test public void testStreamstatsBigWindow() throws IOException { JSONObject actual = executeQuery( @@ -412,39 +413,40 @@ public void testStreamstatsGlobal() throws IOException { + " \"Quebec\",\"country\": \"USA\",\"year\": 2023,\"month\":" + " 4}\n"); client().performRequest(insertRequest); - - JSONObject actual = - executeQuery( - String.format( - "source=%s | streamstats window=2 global=false avg(age) as avg by country", - TEST_INDEX_STATE_COUNTRY)); - - verifyDataRows( - actual, - rows("Jake", "USA", "California", 4, 2023, 70, 70), - rows("Hello", "USA", "New York", 4, 2023, 30, 50), - rows("John", "Canada", "Ontario", 4, 2023, 25, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), - rows("Jay", "USA", "Quebec", 4, 2023, 40, 35)); - - JSONObject actual2 = - executeQuery( - String.format( - "source=%s | streamstats window=2 global=true avg(age) as avg by country", - TEST_INDEX_STATE_COUNTRY)); - - verifyDataRows( - actual2, - rows("Jake", "USA", "California", 4, 2023, 70, 70), - rows("Hello", "USA", "New York", 4, 2023, 30, 50), - rows("John", "Canada", "Ontario", 4, 2023, 25, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), - rows("Jay", "USA", "Quebec", 4, 2023, 40, 40)); - - Request deleteRequest = - new Request( - "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY, docId)); - client().performRequest(deleteRequest); + try { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats window=2 global=false avg(age) as avg by country", + TEST_INDEX_STATE_COUNTRY)); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), + rows("Jay", "USA", "Quebec", 4, 2023, 40, 35)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats window=2 global=true avg(age) as avg by country", + TEST_INDEX_STATE_COUNTRY)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), + rows("Jay", "USA", "Quebec", 4, 2023, 40, 40)); + } finally { + Request deleteRequest = + new Request( + "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY, docId)); + client().performRequest(deleteRequest); + } } @Test @@ -459,44 +461,45 @@ public void testStreamstatsGlobalWithNull() throws IOException { + " \"Quebec\",\"country\": \"USA\",\"year\": 2023,\"month\":" + " 4}\n"); client().performRequest(insertRequest); - - JSONObject actual = - executeQuery( - String.format( - "source=%s | streamstats window=2 global=false avg(age) as avg by country", - TEST_INDEX_STATE_COUNTRY_WITH_NULL)); - - verifyDataRows( - actual, - rows("Jake", "USA", "California", 4, 2023, 70, 70), - rows("Hello", "USA", "New York", 4, 2023, 30, 50), - rows("John", "Canada", "Ontario", 4, 2023, 25, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), - rows(null, "Canada", null, 4, 2023, 10, 15), - rows("Kevin", null, null, 4, 2023, null, null), - rows("Jay", "USA", "Quebec", 4, 2023, 40, 35)); - - JSONObject actual2 = - executeQuery( - String.format( - "source=%s | streamstats window=2 global=true avg(age) as avg by country", - TEST_INDEX_STATE_COUNTRY_WITH_NULL)); - - verifyDataRows( - actual2, - rows("Jake", "USA", "California", 4, 2023, 70, 70), - rows("Hello", "USA", "New York", 4, 2023, 30, 50), - rows("John", "Canada", "Ontario", 4, 2023, 25, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), - rows(null, "Canada", null, 4, 2023, 10, 15), - rows("Kevin", null, null, 4, 2023, null, null), - rows("Jay", "USA", "Quebec", 4, 2023, 40, 40)); - - Request deleteRequest = - new Request( - "DELETE", - String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY_WITH_NULL, docId)); - client().performRequest(deleteRequest); + try { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats window=2 global=false avg(age) as avg by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), + rows(null, "Canada", null, 4, 2023, 10, 15), + rows("Kevin", null, null, 4, 2023, null, null), + rows("Jay", "USA", "Quebec", 4, 2023, 40, 35)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats window=2 global=true avg(age) as avg by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), + rows(null, "Canada", null, 4, 2023, 10, 15), + rows("Kevin", null, null, 4, 2023, null, null), + rows("Jay", "USA", "Quebec", 4, 2023, 40, 40)); + } finally { + Request deleteRequest = + new Request( + "DELETE", + String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY_WITH_NULL, docId)); + client().performRequest(deleteRequest); + } } @Test @@ -510,39 +513,40 @@ public void testStreamstatsReset() throws IOException { + " \"Quebec\",\"country\": \"USA\",\"year\": 2023,\"month\":" + " 4}\n"); client().performRequest(insertRequest); - - JSONObject actual = - executeQuery( - String.format( - "source=%s | streamstats window=2 reset_before=age>29 avg(age) as avg by country", - TEST_INDEX_STATE_COUNTRY)); - - verifyDataRows( - actual, - rows("Jake", "USA", "California", 4, 2023, 70, 70), - rows("Hello", "USA", "New York", 4, 2023, 30, 30), - rows("John", "Canada", "Ontario", 4, 2023, 25, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), - rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); - - JSONObject actual2 = - executeQuery( - String.format( - "source=%s | streamstats window=2 reset_after=age>22 avg(age) as avg by country", - TEST_INDEX_STATE_COUNTRY)); - - verifyDataRows( - actual2, - rows("Jake", "USA", "California", 4, 2023, 70, 70), - rows("Hello", "USA", "New York", 4, 2023, 30, 30), - rows("John", "Canada", "Ontario", 4, 2023, 25, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20), - rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); - - Request deleteRequest = - new Request( - "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY, docId)); - client().performRequest(deleteRequest); + try { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats window=2 reset_before=age>29 avg(age) as avg by country", + TEST_INDEX_STATE_COUNTRY)); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), + rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats window=2 reset_after=age>22 avg(age) as avg by country", + TEST_INDEX_STATE_COUNTRY)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20), + rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); + } finally { + Request deleteRequest = + new Request( + "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY, docId)); + client().performRequest(deleteRequest); + } } @Test @@ -557,44 +561,45 @@ public void testStreamstatsResetWithNull() throws IOException { + " \"Quebec\",\"country\": \"USA\",\"year\": 2023,\"month\":" + " 4}\n"); client().performRequest(insertRequest); - - JSONObject actual = - executeQuery( - String.format( - "source=%s | streamstats window=2 reset_before=age>29 avg(age) as avg by country", - TEST_INDEX_STATE_COUNTRY_WITH_NULL)); - - verifyDataRows( - actual, - rows("Jake", "USA", "California", 4, 2023, 70, 70), - rows("Hello", "USA", "New York", 4, 2023, 30, 30), - rows("John", "Canada", "Ontario", 4, 2023, 25, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), - rows(null, "Canada", null, 4, 2023, 10, 15), - rows("Kevin", null, null, 4, 2023, null, null), - rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); - - JSONObject actual2 = - executeQuery( - String.format( - "source=%s | streamstats window=2 reset_after=age>22 avg(age) as avg by country", - TEST_INDEX_STATE_COUNTRY_WITH_NULL)); - - verifyDataRows( - actual2, - rows("Jake", "USA", "California", 4, 2023, 70, 70), - rows("Hello", "USA", "New York", 4, 2023, 30, 30), - rows("John", "Canada", "Ontario", 4, 2023, 25, 25), - rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20), - rows(null, "Canada", null, 4, 2023, 10, 15), - rows("Kevin", null, null, 4, 2023, null, null), - rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); - - Request deleteRequest = - new Request( - "DELETE", - String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY_WITH_NULL, docId)); - client().performRequest(deleteRequest); + try { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats window=2 reset_before=age>29 avg(age) as avg by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 22.5), + rows(null, "Canada", null, 4, 2023, 10, 15), + rows("Kevin", null, null, 4, 2023, null, null), + rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats window=2 reset_after=age>22 avg(age) as avg by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20), + rows(null, "Canada", null, 4, 2023, 10, 15), + rows("Kevin", null, null, 4, 2023, null, null), + rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); + } finally { + Request deleteRequest = + new Request( + "DELETE", + String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY_WITH_NULL, docId)); + client().performRequest(deleteRequest); + } } @Test @@ -630,7 +635,7 @@ public void testMultipleStreamstats() throws IOException { } @Test - public void testMultipleStreamstatsWithNull() throws IOException { + public void testMultipleStreamstatsWithNull1() throws IOException { JSONObject actual = executeQuery( String.format( @@ -644,10 +649,44 @@ public void testMultipleStreamstatsWithNull() throws IOException { rows("Hello", "USA", "New York", 4, 2023, 30, 30, 50), rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5), - rows(null, "Canada", null, 4, 2023, 10, 10, 18.333333333333332), + rows(null, "Canada", null, 4, 2023, 10, null, 22.5), rows("Kevin", null, null, 4, 2023, null, null, null)); } + @Test + public void testMultipleStreamstatsWithNull2() throws IOException { + final int docId = 5; + Request insertRequest = + new Request( + "PUT", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY, docId)); + insertRequest.setJsonEntity( + "{\"name\": \"Jay\",\"age\": 28," + + " \"country\": \"USA\",\"year\": 2023,\"month\":" + + " 4}\n"); + client().performRequest(insertRequest); + try { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats avg(age) as avg_age by state, country | streamstats" + + " avg(avg_age) as avg_state_age by country", + TEST_INDEX_STATE_COUNTRY)); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5), + rows("Jay", "USA", null, 4, 2023, 28, null, 50)); + } finally { + Request deleteRequest = + new Request( + "DELETE", String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY, docId)); + client().performRequest(deleteRequest); + } + } + @Test public void testStreamstatsAndEventstats() throws IOException { JSONObject actual = @@ -745,6 +784,37 @@ public void testMultipleStreamstatsWithEval() throws IOException { rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 5, 5, 1)); } + @Test + public void testMultipleStreamstatsWithEval2() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval new_state=lower(state), new_country=lower(country) | streamstats" + + " avg(age) as avg_age by new_state, new_country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("new_state", "string"), + schema("new_country", "string"), + schema("avg_age", "double")); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, "california", "usa", 70), + rows("Hello", "USA", "New York", 4, 2023, 30, "new york", "usa", 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, "ontario", "canada", 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "quebec", "canada", 20), + rows(null, "Canada", null, 4, 2023, 10, null, "canada", null), + rows("Kevin", null, null, 4, 2023, null, null, null, null)); + } + @Test public void testStreamstatsEmptyRows() throws IOException { JSONObject actual = diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml index 32538ab17df..c9ef1ca9ebd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml @@ -3,13 +3,14 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[CASE(IS NOT NULL($4), DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:BIGINT)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..12=[{inputs}], proj#0..10=[{exprs}], distinct_states=[$t12]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[null:BIGINT], expr#15=[CASE($t12, $t13, $t14)], proj#0..10=[{exprs}], distinct_states=[$t15]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$11], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t4)], proj#0..12=[{exprs}]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml index cac21b929ee..aac0fab3748 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml @@ -3,13 +3,14 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[CASE(IS NOT NULL($1), ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING), null:VARCHAR)], latest_message=[CASE(IS NOT NULL($1), ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING), null:VARCHAR)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - EnumerableCalc(expr#0..7=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t6], latest_message=[$t7]) + EnumerableCalc(expr#0..8=[{inputs}], expr#9=[null:VARCHAR], expr#10=[CASE($t6, $t7, $t9)], expr#11=[CASE($t6, $t8, $t9)], proj#0..4=[{exprs}], earliest_message=[$t10], latest_message=[$t11]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$5], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[IS NOT NULL($t1)], proj#0..6=[{exprs}]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml index f19625d85e5..e86cfb8236c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml @@ -3,13 +3,14 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[CASE(IS NOT NULL($4), ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:VARCHAR)], latest_message=[CASE(IS NOT NULL($4), ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:VARCHAR)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - EnumerableCalc(expr#0..7=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t6], latest_message=[$t7]) + EnumerableCalc(expr#0..8=[{inputs}], expr#9=[null:VARCHAR], expr#10=[CASE($t6, $t7, $t9)], expr#11=[CASE($t6, $t8, $t9)], proj#0..4=[{exprs}], earliest_message=[$t10], latest_message=[$t11]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$5], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[IS NOT NULL($t4)], proj#0..6=[{exprs}]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml index 550cf0ea9cb..f04a18c5f16 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml @@ -3,13 +3,14 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[CASE(IS NOT NULL($4), DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:BIGINT)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], distinct_states=[$t18]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[null:BIGINT], expr#15=[CASE($t12, $t13, $t14)], proj#0..10=[{exprs}], distinct_states=[$t15]) EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$17], dir0=[ASC]) + EnumerableSort(sort0=[$11], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NOT NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $12=[$t18]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml index c37fae48771..af7d996dfb8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml @@ -3,13 +3,14 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[CASE(IS NOT NULL($1), ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING), null:VARCHAR)], latest_message=[CASE(IS NOT NULL($1), ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING), null:VARCHAR)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - EnumerableCalc(expr#0..13=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t12], latest_message=[$t13]) + EnumerableCalc(expr#0..8=[{inputs}], expr#9=[null:VARCHAR], expr#10=[CASE($t6, $t7, $t9)], expr#11=[CASE($t6, $t8, $t9)], proj#0..4=[{exprs}], earliest_message=[$t10], latest_message=[$t11]) EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableSort(sort0=[$5], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file + EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t1)], proj#0..4=[{exprs}], __stream_seq__=[$t11], $6=[$t12]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml index b85e4b6b7bb..c5c4eec782f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml @@ -3,13 +3,14 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[CASE(IS NOT NULL($4), ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:VARCHAR)], latest_message=[CASE(IS NOT NULL($4), ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:VARCHAR)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - EnumerableCalc(expr#0..13=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t12], latest_message=[$t13]) + EnumerableCalc(expr#0..8=[{inputs}], expr#9=[null:VARCHAR], expr#10=[CASE($t6, $t7, $t9)], expr#11=[CASE($t6, $t8, $t9)], proj#0..4=[{exprs}], earliest_message=[$t10], latest_message=[$t11]) EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableSort(sort0=[$5], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file + EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t4)], proj#0..4=[{exprs}], __stream_seq__=[$t11], $6=[$t12]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index 04f4c7610d9..a5853ecba5d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -24,17 +24,18 @@ public void testStreamstatsBy() { + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ROWS UNBOUNDED PRECEDING)])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[CASE(IS NOT" + + " NULL($7), MAX($5) OVER (PARTITION BY $7 ROWS UNBOUNDED PRECEDING), null:DECIMAL(7," + + " 2))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (PARTITION BY `DEPTNO` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)" - + " `max(SAL)`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, CASE WHEN" + + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ROWS BETWEEN" + + " UNBOUNDED PRECEDING AND CURRENT ROW) ELSE NULL END `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -106,16 +107,17 @@ public void testStreamstatsGlobal() { + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ROWS 4 PRECEDING)])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[CASE(IS NOT" + + " NULL($7), MAX($5) OVER (PARTITION BY $7 ROWS 4 PRECEDING), null:DECIMAL(7, 2))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (PARTITION BY `DEPTNO` ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) `max(SAL)`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, CASE WHEN" + + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ROWS BETWEEN 4" + + " PRECEDING AND CURRENT ROW) ELSE NULL END `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" From 62849d511304e35161a2b03958bd8e96378a7806 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 13 Nov 2025 14:55:44 +0800 Subject: [PATCH 57/99] Support using decimal as span literals (#4717) * Support decimal for span used in bin command Signed-off-by: Yuanchun Shen * Support decimal literal in span function Signed-off-by: Yuanchun Shen * Convert interval to double if it is BigDecimal for further calculation Signed-off-by: Yuanchun Shen * Make the error messages for decimal span length more meaningful Signed-off-by: Yuanchun Shen * Add mssing weeks to PLURAL_UNIT in ppl lexer Signed-off-by: Yuanchun Shen --------- Signed-off-by: Yuanchun Shen --- .../expression/function/udf/SpanFunction.java | 20 ++++++++-- .../calcite/remote/CalciteBinCommandIT.java | 8 ++++ .../opensearch/sql/ppl/StatsCommandIT.java | 17 ++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 39 ++++++++----------- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 3 ++ .../sql/ppl/parser/AstExpressionBuilder.java | 9 +++++ .../sql/ppl/parser/AstBuilderTest.java | 20 ++++++++++ .../ppl/parser/AstExpressionBuilderTest.java | 13 +++++++ 8 files changed, 103 insertions(+), 26 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java index cb5a0501ebb..ed32872c8e2 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java @@ -20,7 +20,6 @@ import org.apache.calcite.schema.impl.ScalarFunctionImpl; import org.apache.calcite.sql.type.CompositeOperandTypeChecker; import org.apache.calcite.sql.type.OperandTypes; -import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeUtil; @@ -44,7 +43,17 @@ public SpanFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.ARG0; + // Return arg0 type if it has a unit (i.e. time related span) + return callBinding -> { + if (SqlTypeUtil.isString(callBinding.getOperandType(2))) { + return callBinding.getOperandType(0); + } + // Use the least restrictive type between the field type and the interval type if it's a + // numeric span. E.g. span(int_field, double_literal) -> double + return callBinding + .getTypeFactory() + .leastRestrictive(List.of(callBinding.getOperandType(0), callBinding.getOperandType(1))); + }; } @Override @@ -56,10 +65,9 @@ public UDFOperandMetadata getOperandMetadata() { .or( OperandTypes.family( SqlTypeFamily.DATETIME, SqlTypeFamily.NUMERIC, SqlTypeFamily.CHARACTER)) - // TODO: numeric span should support decimal as its interval .or( OperandTypes.family( - SqlTypeFamily.NUMERIC, SqlTypeFamily.INTEGER, SqlTypeFamily.ANY))); + SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC, SqlTypeFamily.ANY))); } public static class SpanImplementor implements NotNullImplementor { @@ -72,8 +80,12 @@ public Expression implement( Expression interval = translatedOperands.get(1); RelDataType fieldType = call.getOperands().get(0).getType(); + RelDataType intervalType = call.getOperands().get(1).getType(); RelDataType unitType = call.getOperands().get(2).getType(); + if (SqlTypeUtil.isDecimal(intervalType)) { + interval = Expressions.call(interval, "doubleValue"); + } if (SqlTypeUtil.isNull(unitType)) { return switch (call.getType().getSqlTypeName()) { case BIGINT, INTEGER, SMALLINT, TINYINT -> Expressions.multiply( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index bb326dc39a7..f2589720967 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -1068,4 +1068,12 @@ public void testBinWithEvalCreatedDottedFieldName() throws IOException { rows(false, "go", "opentelemetry", 16, 1, "12-14"), rows(true, "rust", "opentelemetry", 12, 1, "14-16")); } + + @Test + public void testBinWithDecimalSpan() throws IOException { + JSONObject result = + executeQuery("source=events_null | bin cpu_usage span=7.5 | stats count() by cpu_usage"); + verifySchema(result, schema("count()", "bigint"), schema("cpu_usage", "string")); + verifyDataRows(result, rows(3, "37.5-45.0"), rows(2, "45.0-52.5"), rows(1, "52.5-60.0")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java index eb6573c27b6..b339f3f8023 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java @@ -1225,4 +1225,21 @@ public void testStatsSortOnMeasureComplex() throws IOException { resetQueryBucketSize(); } } + + @Test + public void testStatsByFractionalSpan() throws IOException { + JSONObject response1 = + executeQuery( + String.format( + "source=%s | stats count by span(balance, 4170.5)", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema(response1, schema("count", "bigint"), schema("span(balance,4170.5)", "double")); + verifyDataRows( + response1, + rows(3, null), + rows(1, 4170.5), + rows(1, 29193.5), + rows(1, 37534.5), + rows(1, 45875.5)); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 8d64bfabb1f..d0bf12ee9e3 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -519,40 +519,35 @@ ALIGNTIME: 'ALIGNTIME'; // Must precede ID to avoid conflicts with identifier matching PERCENTILE_SHORTCUT: PERC(INTEGER_LITERAL | DECIMAL_LITERAL) | 'P'(INTEGER_LITERAL | DECIMAL_LITERAL); -SPANLENGTH: [0-9]+ ( - 'US' |'CS'|'DS' - |'MS'|'MILLISECOND'|'MILLISECONDS' - |'S'|'SEC'|'SECS'|'SECOND'|'SECONDS' - |'MIN'|'MINS'|'MINUTE'|'MINUTES' - |'H'|'HR'|'HRS'|'HOUR'|'HOURS' - |'H'|'HR'|'HRS'|'HOUR'|'HOURS' - |'D'|'DAY'|'DAYS' - |'W'|'WEEK'|'WEEKS' - |'M'|'MON'|'MONTH'|'MONTHS' - |'Q'|'QTR'|'QTRS'|'QUARTER'|'QUARTERS' - |'Y'|'YR'|'YRS'|'YEAR'|'YEARS' -); +fragment DAY_OR_DOUBLE: 'D'; +fragment COMMON_TIME_UNIT: 'S'|'SEC'|'SECOND' + |'M'|'MIN'|'MINUTE' + |'H'|'HR'|'HOUR' + |'DAY'|'W'|'WEEK' + |'MON'|'MONTH' + |'Q'|'QTR'|'QUARTER' + |'Y'|'YR'|'YEAR'; +fragment PLURAL_UNIT: 'MILLISECONDS'|'SECS'|'SECONDS'|'MINS'|'MINUTES'|'HRS'|'HOURS' + |'DAYS'|'WEEKS'|'MONTHS'|'QTRS'|'QUARTERS'|'YRS'|'YEARS'; +fragment SPANUNIT: COMMON_TIME_UNIT | PLURAL_UNIT + |'US'|'CS'|'DS' + |'MS'|'MILLISECOND'; +SPANLENGTH: DEC_DIGIT+ (SPANUNIT | DAY_OR_DOUBLE); +DECIMAL_SPANLENGTH: (DEC_DIGIT+)? '.' DEC_DIGIT+ SPANUNIT; NUMERIC_ID : DEC_DIGIT+ ID_LITERAL; // LITERALS AND VALUES //STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; fragment WEEK_SNAP_UNIT: 'W' [0-7]; -fragment TIME_SNAP_UNIT: 'S' | 'SEC' | 'SECOND' - | 'M' | 'MIN' | 'MINUTE' - | 'H' | 'HR' | 'HOUR' | 'HOURS' - | 'D' | 'DAY' - | 'W' | 'WEEK' | WEEK_SNAP_UNIT - | 'MON' | 'MONTH' - | 'Q' | 'QTR' | 'QUARTER' - | 'Y' | 'YR' | 'YEAR'; +fragment TIME_SNAP_UNIT: COMMON_TIME_UNIT | WEEK_SNAP_UNIT | DAY_OR_DOUBLE; TIME_SNAP: AT TIME_SNAP_UNIT; ID: ID_LITERAL; CLUSTER: CLUSTER_PREFIX_LITERAL; INTEGER_LITERAL: DEC_DIGIT+; DECIMAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+; FLOAT_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+ 'F'; -DOUBLE_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+ 'D'; +DOUBLE_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+ DAY_OR_DOUBLE; fragment DATE_SUFFIX: ([\-.][*0-9]+)+; fragment CLUSTER_PREFIX_LITERAL: [*A-Z]+?[*A-Z_\-0-9]* COLON; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index cc5d3d4aa8d..00213f891f8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -332,7 +332,10 @@ timechartParameter spanLiteral : SPANLENGTH + | DECIMAL_SPANLENGTH + | DOUBLE_LITERAL // 1.5d can also represent decimal span length | INTEGER_LITERAL + | DECIMAL_LITERAL ; evalCommand diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 05029de875a..632f822d2fb 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -824,6 +824,15 @@ private List multiFieldRelevanceArguments( public UnresolvedExpression visitSpanLiteral(OpenSearchPPLParser.SpanLiteralContext ctx) { if (ctx.INTEGER_LITERAL() != null) { return AstDSL.intLiteral(Integer.parseInt(ctx.INTEGER_LITERAL().getText())); + } else if (ctx.DECIMAL_LITERAL() != null) { + return AstDSL.decimalLiteral(new BigDecimal(ctx.DECIMAL_LITERAL().getText())); + } else if (ctx.DECIMAL_SPANLENGTH() != null || ctx.DOUBLE_LITERAL() != null) { + throw new IllegalArgumentException( + StringUtils.format( + "Span length [%s] is invalid: floating-point time intervals are not supported.", + ctx.DECIMAL_SPANLENGTH() != null + ? ctx.DECIMAL_SPANLENGTH().getText() + : ctx.DOUBLE_LITERAL().getText())); } else { return AstDSL.stringLiteral(ctx.getText()); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index aea0a6e289d..808fbae9273 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1606,4 +1606,24 @@ public void testChartCommandWithBottomLimit() { exprList(argument("limit", intLiteral(3)), argument("top", booleanLiteral(false)))) .build()); } + + @Test + public void testTimeSpanWithDecimalShouldThrow() { + Throwable t1 = + assertThrows( + IllegalArgumentException.class, () -> plan("source=t | timechart span=1.5d count")); + assertTrue( + t1.getMessage() + .contains( + "Span length [1.5d] is invalid: floating-point time intervals are not supported.")); + + Throwable t2 = + assertThrows( + IllegalArgumentException.class, + () -> plan("source=t | stats count by span(@timestamp, 2.5y)")); + assertTrue( + t2.getMessage() + .contains( + "Span length [2.5y] is invalid: floating-point time intervals are not supported.")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index c5accc7f7ca..c83b7dfecb6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -1656,6 +1656,19 @@ public void testVisitSpanLiteral() { exprList( argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) .build()); + + // Test span literal with decimal value + assertEqual( + "source=events_null | bin cpu_usage span=7.5 | stats count() by cpu_usage", + agg( + bin( + relation("events_null"), + field("cpu_usage"), + argument("span", decimalLiteral(new java.math.BigDecimal("7.5")))), + exprList(alias("count()", aggregate("count", allFields()))), + emptyList(), + exprList(alias("cpu_usage", field("cpu_usage"))), + defaultStatsArgs())); } @Test From 16281d7a89c70dd32fda14a788350ce4bc3439bf Mon Sep 17 00:00:00 2001 From: Kai Huang <105710027+ahkcs@users.noreply.github.com> Date: Thu, 13 Nov 2025 11:12:48 -0800 Subject: [PATCH 58/99] Support mvindex eval function (#4794) --- .../function/BuiltinFunctionName.java | 2 + .../CollectionUDF/MVIndexFunctionImp.java | 119 ++++++++++++++++++ .../expression/function/PPLFuncImpTable.java | 16 +++ docs/user/ppl/functions/collection.rst | 55 ++++++++ .../remote/CalciteArrayFunctionIT.java | 119 ++++++++++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../calcite/CalcitePPLArrayFunctionTest.java | 108 ++++++++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 13 ++ 9 files changed, 434 insertions(+) create mode 100644 core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVIndexFunctionImp.java diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 0fe042a5155..656bc018e50 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -68,11 +68,13 @@ public enum BuiltinFunctionName { /** Collection functions */ ARRAY(FunctionName.of("array")), ARRAY_LENGTH(FunctionName.of("array_length")), + ARRAY_SLICE(FunctionName.of("array_slice"), true), MAP_APPEND(FunctionName.of("map_append"), true), MAP_CONCAT(FunctionName.of("map_concat"), true), MAP_REMOVE(FunctionName.of("map_remove"), true), MVAPPEND(FunctionName.of("mvappend")), MVJOIN(FunctionName.of("mvjoin")), + MVINDEX(FunctionName.of("mvindex")), FORALL(FunctionName.of("forall")), EXISTS(FunctionName.of("exists")), FILTER(FunctionName.of("filter")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVIndexFunctionImp.java b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVIndexFunctionImp.java new file mode 100644 index 00000000000..24e4b489632 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVIndexFunctionImp.java @@ -0,0 +1,119 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.CollectionUDF; + +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDFUNCTION; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_SLICE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.IF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_ITEM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.LESS; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SUBTRACT; + +import java.math.BigDecimal; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.opensearch.sql.expression.function.PPLFuncImpTable; + +/** + * MVINDEX function implementation that returns a subset of a multivalue array. + * + *

Usage: + * + *

+ * + *

Supports negative indexing where -1 refers to the last element. + * + *

Implementation notes: + * + *

+ */ +public class MVIndexFunctionImp implements PPLFuncImpTable.FunctionImp { + + @Override + public RexNode resolve(RexBuilder builder, RexNode... args) { + RexNode array = args[0]; + RexNode startIdx = args[1]; + + // Use resolve to get array length instead of direct makeCall + RexNode arrayLen = PPLFuncImpTable.INSTANCE.resolve(builder, ARRAY_LENGTH, array); + + if (args.length == 2) { + // Single element access using ITEM (1-based indexing) + return resolveSingleElement(builder, array, startIdx, arrayLen); + } else { + // Range access using ARRAY_SLICE (0-based indexing) + RexNode endIdx = args[2]; + return resolveRange(builder, array, startIdx, endIdx, arrayLen); + } + } + + /** + * Resolves single element access: mvindex(array, index) + * + *

Uses Calcite's ITEM operator which uses 1-based indexing. Converts PPL's 0-based index to + * 1-based by adding 1. + */ + private RexNode resolveSingleElement( + RexBuilder builder, RexNode array, RexNode startIdx, RexNode arrayLen) { + // Convert 0-based PPL index to 1-based Calcite ITEM index + RexNode zero = builder.makeExactLiteral(BigDecimal.ZERO); + RexNode one = builder.makeExactLiteral(BigDecimal.ONE); + + RexNode isNegative = PPLFuncImpTable.INSTANCE.resolve(builder, LESS, startIdx, zero); + RexNode sumArrayLenStart = + PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, arrayLen, startIdx); + RexNode negativeCase = + PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, sumArrayLenStart, one); + RexNode positiveCase = PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, startIdx, one); + + RexNode normalizedStart = + PPLFuncImpTable.INSTANCE.resolve(builder, IF, isNegative, negativeCase, positiveCase); + + return PPLFuncImpTable.INSTANCE.resolve(builder, INTERNAL_ITEM, array, normalizedStart); + } + + /** + * Resolves range access: mvindex(array, start, end) + * + *

Uses Calcite's ARRAY_SLICE operator which uses 0-based indexing and a length parameter. + * PPL's end index is inclusive, so length = (end - start) + 1. + */ + private RexNode resolveRange( + RexBuilder builder, RexNode array, RexNode startIdx, RexNode endIdx, RexNode arrayLen) { + // Normalize negative indices for ARRAY_SLICE (0-based) + RexNode zero = builder.makeExactLiteral(BigDecimal.ZERO); + RexNode one = builder.makeExactLiteral(BigDecimal.ONE); + + RexNode isStartNegative = PPLFuncImpTable.INSTANCE.resolve(builder, LESS, startIdx, zero); + RexNode startNegativeCase = + PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, arrayLen, startIdx); + RexNode normalizedStart = + PPLFuncImpTable.INSTANCE.resolve(builder, IF, isStartNegative, startNegativeCase, startIdx); + + RexNode isEndNegative = PPLFuncImpTable.INSTANCE.resolve(builder, LESS, endIdx, zero); + RexNode endNegativeCase = + PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, arrayLen, endIdx); + RexNode normalizedEnd = + PPLFuncImpTable.INSTANCE.resolve(builder, IF, isEndNegative, endNegativeCase, endIdx); + + // Calculate length: (normalizedEnd - normalizedStart) + 1 + RexNode diff = + PPLFuncImpTable.INSTANCE.resolve(builder, SUBTRACT, normalizedEnd, normalizedStart); + RexNode length = PPLFuncImpTable.INSTANCE.resolve(builder, ADDFUNCTION, diff, one); + + // Call ARRAY_SLICE(array, normalizedStart, length) + return PPLFuncImpTable.INSTANCE.resolve(builder, ARRAY_SLICE, array, normalizedStart, length); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 76a6755ad52..b9ecb9202d0 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -17,6 +17,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.AND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_SLICE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASCII; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ATAN; @@ -149,6 +150,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVAPPEND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVINDEX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVJOIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOTEQUAL; @@ -283,6 +285,7 @@ import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.executor.QueryType; +import org.opensearch.sql.expression.function.CollectionUDF.MVIndexFunctionImp; public class PPLFuncImpTable { private static final Logger logger = LogManager.getLogger(PPLFuncImpTable.class); @@ -974,12 +977,25 @@ void populate() { builder.makeCall(SqlLibraryOperators.ARRAY_JOIN, array, delimiter), PPLTypeChecker.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER)); + // Register MVINDEX to use Calcite's ITEM/ARRAY_SLICE with index normalization + register( + MVINDEX, + new MVIndexFunctionImp(), + PPLTypeChecker.wrapComposite( + (CompositeOperandTypeChecker) + OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER) + .or( + OperandTypes.family( + SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER)), + false)); + registerOperator(ARRAY, PPLBuiltinOperators.ARRAY); registerOperator(MVAPPEND, PPLBuiltinOperators.MVAPPEND); registerOperator(MAP_APPEND, PPLBuiltinOperators.MAP_APPEND); registerOperator(MAP_CONCAT, SqlLibraryOperators.MAP_CONCAT); registerOperator(MAP_REMOVE, PPLBuiltinOperators.MAP_REMOVE); registerOperator(ARRAY_LENGTH, SqlLibraryOperators.ARRAY_LENGTH); + registerOperator(ARRAY_SLICE, SqlLibraryOperators.ARRAY_SLICE); registerOperator(FORALL, PPLBuiltinOperators.FORALL); registerOperator(EXISTS, PPLBuiltinOperators.EXISTS); registerOperator(FILTER, PPLBuiltinOperators.FILTER); diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst index 76931e53876..5c2b7c30f74 100644 --- a/docs/user/ppl/functions/collection.rst +++ b/docs/user/ppl/functions/collection.rst @@ -301,3 +301,58 @@ Example:: |--------------| | [1,text,2.5] | +--------------+ + +MVINDEX +------- + +Description +>>>>>>>>>>> + +Usage: mvindex(array, start, [end]) returns a subset of the multivalue array using the start and optional end index values. Indexes are 0-based (first element is at index 0). Supports negative indexing where -1 refers to the last element. When only start is provided, returns a single element. When both start and end are provided, returns an array of elements from start to end (inclusive). + +Argument type: array: ARRAY, start: INTEGER, end: INTEGER (optional) + +Return type: ANY (single element) or ARRAY (range) + +Example:: + + os> source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, 1) | fields result | head 1 + fetched rows / total rows = 1/1 + +--------+ + | result | + |--------| + | b | + +--------+ + + os> source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, -1) | fields result | head 1 + fetched rows / total rows = 1/1 + +--------+ + | result | + |--------| + | e | + +--------+ + + os> source=people | eval array = array(1, 2, 3, 4, 5), result = mvindex(array, 1, 3) | fields result | head 1 + fetched rows / total rows = 1/1 + +---------+ + | result | + |---------| + | [2,3,4] | + +---------+ + + os> source=people | eval array = array(1, 2, 3, 4, 5), result = mvindex(array, -3, -1) | fields result | head 1 + fetched rows / total rows = 1/1 + +---------+ + | result | + |---------| + | [3,4,5] | + +---------+ + + os> source=people | eval array = array('alex', 'celestino', 'claudia', 'david'), result = mvindex(array, 0, 2) | fields result | head 1 + fetched rows / total rows = 1/1 + +--------------------------+ + | result | + |--------------------------| + | [alex,celestino,claudia] | + +--------------------------+ + diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java index 458158c45d4..c829565768f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java @@ -370,4 +370,123 @@ public void testMvjoinWithMultipleRealFields() throws IOException { firstRow.getString(0) + " | " + firstRow.getString(1) + " | " + firstRow.getString(2), firstRow.getString(3)); } + + @Test + public void testMvindexSingleElementPositive() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('a', 'b', 'c', 'd', 'e'), result = mvindex(arr, 1)" + + " | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("b")); + } + + @Test + public void testMvindexSingleElementNegative() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('a', 'b', 'c', 'd', 'e'), result = mvindex(arr, -1)" + + " | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("e")); + } + + @Test + public void testMvindexSingleElementNegativeMiddle() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('a', 'b', 'c', 'd', 'e'), result = mvindex(arr, -3)" + + " | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "string")); + verifyDataRows(actual, rows("c")); + } + + @Test + public void testMvindexRangePositive() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, 1, 3) | head" + + " 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of(2, 3, 4))); + } + + @Test + public void testMvindexRangeNegative() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, -3, -1) |" + + " head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of(3, 4, 5))); + } + + @Test + public void testMvindexRangeMixed() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, -4, 2) | head" + + " 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of(2, 3))); + } + + @Test + public void testMvindexRangeFirstThree() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('alex', 'celestino', 'claudia', 'david'), result =" + + " mvindex(arr, 0, 2) | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of("alex", "celestino", "claudia"))); + } + + @Test + public void testMvindexRangeLastThree() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('buttercup', 'dash', 'flutter', 'honey', 'ivory'," + + " 'minty', 'pinky', 'rarity'), result = mvindex(arr, -3, -1) | head 1 |" + + " fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of("minty", "pinky", "rarity"))); + } + + @Test + public void testMvindexRangeSingleElement() throws IOException { + // When start == end, should return single element in array + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, 2, 2) | head" + + " 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of(3))); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index d0bf12ee9e3..370aaad56c0 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -442,6 +442,7 @@ ARRAY: 'ARRAY'; ARRAY_LENGTH: 'ARRAY_LENGTH'; MVAPPEND: 'MVAPPEND'; MVJOIN: 'MVJOIN'; +MVINDEX: 'MVINDEX'; FORALL: 'FORALL'; FILTER: 'FILTER'; TRANSFORM: 'TRANSFORM'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 00213f891f8..70a9eff0fe3 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1094,6 +1094,7 @@ collectionFunctionName | ARRAY_LENGTH | MVAPPEND | MVJOIN + | MVINDEX | FORALL | EXISTS | FILTER diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java index cd98e18e4be..4bab03527c1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java @@ -106,4 +106,112 @@ public void testMvjoinWithFieldReference() { + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testMvindexSingleElementPositive() { + String ppl = + "source=EMP | eval arr = array('a', 'b', 'c'), result = mvindex(arr, 1) | head 1 |" + + " fields result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array('a', 'b', 'c')]," + + " result=[ITEM(array('a', 'b', 'c'), +(1, 1))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "result=b\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `array`('a', 'b', 'c')[1 + 1] `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvindexSingleElementNegative() { + String ppl = + "source=EMP | eval arr = array('a', 'b', 'c'), result = mvindex(arr, -1) | head 1 |" + + " fields result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array('a', 'b', 'c')]," + + " result=[ITEM(array('a', 'b', 'c'), +(+(ARRAY_LENGTH(array('a', 'b', 'c'))," + + " -1), 1))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "result=c\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `array`('a', 'b', 'c')[ARRAY_LENGTH(`array`('a', 'b', 'c')) + -1 + 1]" + + " `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvindexRangePositive() { + String ppl = + "source=EMP | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, 1, 3) | head 1 |" + + " fields result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array(1, 2, 3, 4, 5)]," + + " result=[ARRAY_SLICE(array(1, 2, 3, 4, 5), 1, +(-(3, 1), 1))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "result=[2, 3, 4]\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT ARRAY_SLICE(`array`(1, 2, 3, 4, 5), 1, 3 - 1 + 1) `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvindexRangeNegative() { + String ppl = + "source=EMP | eval arr = array(1, 2, 3, 4, 5), result = mvindex(arr, -3, -1) | head 1 |" + + " fields result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array(1, 2, 3, 4, 5)]," + + " result=[ARRAY_SLICE(array(1, 2, 3, 4, 5), +(ARRAY_LENGTH(array(1, 2, 3, 4, 5))," + + " -3), +(-(+(ARRAY_LENGTH(array(1, 2, 3, 4, 5)), -1)," + + " +(ARRAY_LENGTH(array(1, 2, 3, 4, 5)), -3)), 1))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "result=[3, 4, 5]\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT ARRAY_SLICE(`array`(1, 2, 3, 4, 5), ARRAY_LENGTH(`array`(1, 2, 3, 4, 5)) + -3," + + " ARRAY_LENGTH(`array`(1, 2, 3, 4, 5)) + -1 - (ARRAY_LENGTH(`array`(1, 2, 3, 4, 5))" + + " + -3) + 1) `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 1a150439980..f205b9fe0cc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -809,6 +809,19 @@ public void testMvappend() { anonymize("source=t | eval result=mvappend(a, 'b', 'c') | fields result")); } + @Test + public void testMvindex() { + // Test mvindex with single element access + assertEquals( + "source=table | eval identifier=mvindex(array(***,***,***),***) | fields + identifier", + anonymize("source=t | eval result=mvindex(array('a', 'b', 'c'), 1) | fields result")); + // Test mvindex with range access + assertEquals( + "source=table | eval identifier=mvindex(array(***,***,***,***,***),***,***) | fields +" + + " identifier", + anonymize("source=t | eval result=mvindex(array(1, 2, 3, 4, 5), 1, 3) | fields result")); + } + @Test public void testRexWithOffsetField() { when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); From 2fc072d6b08d401f9d96db4c00bc2c33c9b19ad1 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Fri, 14 Nov 2025 09:05:19 +0800 Subject: [PATCH 59/99] Fix function identify problem in converting to sql dialect (#4793) --- .../function/PPLBuiltinOperators.java | 8 +-- .../expression/function/PPLFuncImpTable.java | 4 +- .../function/UserDefinedFunctionBuilder.java | 7 +++ ...axFunction.java => ScalarMaxFunction.java} | 4 +- ...inFunction.java => ScalarMinFunction.java} | 4 +- .../sql/calcite/remote/CalciteExplainIT.java | 12 ++-- .../calcite/explain_eval_max.json | 6 -- .../calcite/explain_eval_max.yaml | 8 +++ .../calcite/explain_eval_min.json | 6 -- .../calcite/explain_eval_min.yaml | 8 +++ .../calcite_no_pushdown/explain_eval_max.json | 6 -- .../calcite_no_pushdown/explain_eval_max.yaml | 9 +++ .../calcite_no_pushdown/explain_eval_min.json | 6 -- .../calcite_no_pushdown/explain_eval_min.yaml | 9 +++ .../calcite/CalcitePPLAggregationTest.java | 16 +++--- .../calcite/CalcitePPLArrayFunctionTest.java | 8 +-- .../sql/ppl/calcite/CalcitePPLBasicTest.java | 10 ++-- .../sql/ppl/calcite/CalcitePPLBinTest.java | 14 ++--- .../sql/ppl/calcite/CalcitePPLChartTest.java | 10 ++-- .../ppl/calcite/CalcitePPLCountEvalTest.java | 2 +- .../CalcitePPLDateTimeFunctionTest.java | 2 +- .../sql/ppl/calcite/CalcitePPLEvalTest.java | 42 +++++++------- .../sql/ppl/calcite/CalcitePPLGrokTest.java | 4 +- .../calcite/CalcitePPLMathFunctionTest.java | 2 +- .../calcite/CalcitePPLMultisearchTest.java | 4 +- .../sql/ppl/calcite/CalcitePPLParseTest.java | 4 +- .../ppl/calcite/CalcitePPLPatternsTest.java | 36 ++++++------ .../sql/ppl/calcite/CalcitePPLRexTest.java | 44 +++++++-------- .../sql/ppl/calcite/CalcitePPLSearchTest.java | 4 +- .../sql/ppl/calcite/CalcitePPLSpathTest.java | 4 +- .../calcite/CalcitePPLStringFunctionTest.java | 14 ++--- .../ppl/calcite/CalcitePPLTimechartTest.java | 56 +++++++++---------- 32 files changed, 196 insertions(+), 177 deletions(-) rename core/src/main/java/org/opensearch/sql/expression/function/udf/math/{MaxFunction.java => ScalarMaxFunction.java} (95%) rename core/src/main/java/org/opensearch/sql/expression/function/udf/math/{MinFunction.java => ScalarMinFunction.java} (95%) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.yaml diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 26960192f08..ac80cf544ae 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -99,10 +99,10 @@ import org.opensearch.sql.expression.function.udf.math.ConvFunction; import org.opensearch.sql.expression.function.udf.math.DivideFunction; import org.opensearch.sql.expression.function.udf.math.EulerFunction; -import org.opensearch.sql.expression.function.udf.math.MaxFunction; -import org.opensearch.sql.expression.function.udf.math.MinFunction; import org.opensearch.sql.expression.function.udf.math.ModFunction; import org.opensearch.sql.expression.function.udf.math.NumberToStringFunction; +import org.opensearch.sql.expression.function.udf.math.ScalarMaxFunction; +import org.opensearch.sql.expression.function.udf.math.ScalarMinFunction; /** Defines functions and operators that are implemented only by PPL */ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { @@ -132,8 +132,8 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlOperator DIVIDE = new DivideFunction().toUDF("DIVIDE"); public static final SqlOperator SHA2 = CryptographicFunction.sha2().toUDF("SHA2"); public static final SqlOperator CIDRMATCH = new CidrMatchFunction().toUDF("CIDRMATCH"); - public static final SqlOperator MAX = new MaxFunction().toUDF("MAX"); - public static final SqlOperator MIN = new MinFunction().toUDF("MIN"); + public static final SqlOperator SCALAR_MAX = new ScalarMaxFunction().toUDF("SCALAR_MAX"); + public static final SqlOperator SCALAR_MIN = new ScalarMinFunction().toUDF("SCALAR_MIN"); public static final SqlOperator COSH = adaptMathFunctionToUDF( diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index b9ecb9202d0..06162a86813 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -851,8 +851,8 @@ void populate() { registerOperator(INTERNAL_TRANSLATE3, SqlLibraryOperators.TRANSLATE3); // Register eval functions for PPL max() and min() calls - registerOperator(MAX, PPLBuiltinOperators.MAX); - registerOperator(MIN, PPLBuiltinOperators.MIN); + registerOperator(MAX, PPLBuiltinOperators.SCALAR_MAX); + registerOperator(MIN, PPLBuiltinOperators.SCALAR_MIN); // Register PPL UDF operator registerOperator(COSH, PPLBuiltinOperators.COSH); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/UserDefinedFunctionBuilder.java b/core/src/main/java/org/opensearch/sql/expression/function/UserDefinedFunctionBuilder.java index 30dd644fc76..f52c6b69f0e 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/UserDefinedFunctionBuilder.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/UserDefinedFunctionBuilder.java @@ -59,6 +59,13 @@ default SqlUserDefinedFunction toUDF(String functionName, boolean isDeterministi public boolean isDeterministic() { return isDeterministic; } + + @Override + public SqlIdentifier getSqlIdentifier() { + // to avoid convert to sql dialog as identifier, use keyword instead + // check the code SqlUtil.unparseFunctionSyntax() + return null; + } }; } } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/math/ScalarMaxFunction.java similarity index 95% rename from core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java rename to core/src/main/java/org/opensearch/sql/expression/function/udf/math/ScalarMaxFunction.java index 70e17cd13b4..9b4b0b48c73 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MaxFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/math/ScalarMaxFunction.java @@ -24,9 +24,9 @@ * MAX(value1, value2, ...) returns the maximum value from the arguments. For mixed types, strings * have higher precedence than numbers. */ -public class MaxFunction extends ImplementorUDF { +public class ScalarMaxFunction extends ImplementorUDF { - public MaxFunction() { + public ScalarMaxFunction() { super(new MaxImplementor(), NullPolicy.ALL); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MinFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/math/ScalarMinFunction.java similarity index 95% rename from core/src/main/java/org/opensearch/sql/expression/function/udf/math/MinFunction.java rename to core/src/main/java/org/opensearch/sql/expression/function/udf/math/ScalarMinFunction.java index 2bb00ce05e6..441257a422e 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/math/MinFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/math/ScalarMinFunction.java @@ -24,9 +24,9 @@ * MIN(value1, value2, ...) returns the minimum value from the arguments. For mixed types, numbers * have higher precedence than strings. */ -public class MinFunction extends ImplementorUDF { +public class ScalarMinFunction extends ImplementorUDF { - public MinFunction() { + public ScalarMinFunction() { super(new MinImplementor(), NullPolicy.ALL); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 52fb467f5bc..85e39dc062e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1300,19 +1300,19 @@ public void testExplainSortOnMeasureMultiBucketsNotMultiTermsNotPushDown() throw @Test public void testExplainEvalMax() throws IOException { - String expected = loadExpectedPlan("explain_eval_max.json"); - assertJsonEqualsIgnoreId( + String expected = loadExpectedPlan("explain_eval_max.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_account | eval new = max(1, 2, 3, age, 'banana')")); } @Test public void testExplainEvalMin() throws IOException { - String expected = loadExpectedPlan("explain_eval_min.json"); - assertJsonEqualsIgnoreId( + String expected = loadExpectedPlan("explain_eval_min.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_account | eval new = min(1, 2, 3, age, 'banana')")); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.json deleted file mode 100644 index 6140bebcc40..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[MAX(1, 2, 3, $8, 'banana':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[MAX($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], $f11=[$t15])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.yaml new file mode 100644 index 00000000000..bd517738a0b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_max.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[SCALAR_MAX(1, 2, 3, $8, 'banana':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[SCALAR_MAX($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], $f11=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.json deleted file mode 100644 index b5e85f0eb58..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[MIN(1, 2, 3, $8, 'banana':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[MIN($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], $f11=[$t15])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml new file mode 100644 index 00000000000..71e2911eb09 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[SCALAR_MIN(1, 2, 3, $8, 'banana':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[SCALAR_MIN($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], $f11=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.json deleted file mode 100644 index 5c366156f17..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[MAX(1, 2, 3, $8, 'banana':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[2], expr#19=[3], expr#20=['banana':VARCHAR], expr#21=[MAX($t17, $t18, $t19, $t8, $t20)], proj#0..10=[{exprs}], new=[$t21])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.yaml new file mode 100644 index 00000000000..cae22c2a7f4 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_max.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[SCALAR_MAX(1, 2, 3, $8, 'banana':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[2], expr#19=[3], expr#20=['banana':VARCHAR], expr#21=[SCALAR_MAX($t17, $t18, $t19, $t8, $t20)], proj#0..10=[{exprs}], new=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.json deleted file mode 100644 index 5d6872ca060..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[MIN(1, 2, 3, $8, 'banana':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[2], expr#19=[3], expr#20=['banana':VARCHAR], expr#21=[MIN($t17, $t18, $t19, $t8, $t20)], proj#0..10=[{exprs}], new=[$t21])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.yaml new file mode 100644 index 00000000000..e7c0af846f0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], new=[SCALAR_MIN(1, 2, 3, $8, 'banana':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[2], expr#19=[3], expr#20=['banana':VARCHAR], expr#21=[SCALAR_MIN($t17, $t18, $t19, $t8, $t20)], proj#0..10=[{exprs}], new=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index 1446c7b0470..81597c65bbe 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -447,9 +447,9 @@ public void testAvgBySpanAndFields() { String expectedSparkSql = "" - + "SELECT AVG(`SAL`) `avg(SAL)`, `SPAN`(`EMPNO`, 500, NULL) `empno_span`, `DEPTNO`\n" + + "SELECT AVG(`SAL`) `avg(SAL)`, SPAN(`EMPNO`, 500, NULL) `empno_span`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" - + "GROUP BY `DEPTNO`, `SPAN`(`EMPNO`, 500, NULL)\n" + + "GROUP BY `DEPTNO`, SPAN(`EMPNO`, 500, NULL)\n" + "ORDER BY `DEPTNO`, 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -470,10 +470,10 @@ public void testAvgByTimeSpanAndFields() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT AVG(`SAL`) `avg(SAL)`, `SPAN`(`HIREDATE`, 1, 'y') `hiredate_span`, `DEPTNO`\n" + "SELECT AVG(`SAL`) `avg(SAL)`, SPAN(`HIREDATE`, 1, 'y') `hiredate_span`, `DEPTNO`\n" + "FROM `scott`.`EMP`\n" + "WHERE `HIREDATE` IS NOT NULL\n" - + "GROUP BY `DEPTNO`, `SPAN`(`HIREDATE`, 1, 'y')\n" + + "GROUP BY `DEPTNO`, SPAN(`HIREDATE`, 1, 'y')\n" + "ORDER BY `DEPTNO`, 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -596,9 +596,9 @@ public void testStddevSampByFieldWithAlias() { String expectedSparkSql = "" - + "SELECT STDDEV_SAMP(`SAL`) `samp`, `SPAN`(`EMPNO`, 100, NULL) `empno_span`\n" + + "SELECT STDDEV_SAMP(`SAL`) `samp`, SPAN(`EMPNO`, 100, NULL) `empno_span`\n" + "FROM `scott`.`EMP`\n" - + "GROUP BY `SPAN`(`EMPNO`, 100, NULL)"; + + "GROUP BY SPAN(`EMPNO`, 100, NULL)"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -982,10 +982,10 @@ public void testSortAggregationMetrics2() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT AVG(`SAL`) `avg`, `SPAN`(`HIREDATE`, 1, 'y') `hiredate_span`\n" + "SELECT AVG(`SAL`) `avg`, SPAN(`HIREDATE`, 1, 'y') `hiredate_span`\n" + "FROM `scott`.`EMP`\n" + "WHERE `HIREDATE` IS NOT NULL\n" - + "GROUP BY `SPAN`(`HIREDATE`, 1, 'y')\n" + + "GROUP BY SPAN(`HIREDATE`, 1, 'y')\n" + "ORDER BY 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java index 4bab03527c1..4dec305d28c 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java @@ -33,7 +33,7 @@ public void testMvjoinWithStringArray() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT ARRAY_JOIN(`array`('a', 'b', 'c'), ',') `joined`\n" + "SELECT ARRAY_JOIN(ARRAY('a', 'b', 'c'), ',') `joined`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -59,7 +59,7 @@ public void testMvjoinWithDifferentDelimiter() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT ARRAY_JOIN(`array`('apple', 'banana', 'cherry'), ' | ') `joined`\n" + "SELECT ARRAY_JOIN(ARRAY('apple', 'banana', 'cherry'), ' | ') `joined`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -82,7 +82,7 @@ public void testMvjoinWithEmptyArray() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT ARRAY_JOIN(`array`(), ',') `joined`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + "SELECT ARRAY_JOIN(ARRAY(), ',') `joined`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -101,7 +101,7 @@ public void testMvjoinWithFieldReference() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT ARRAY_JOIN(`array`(`ENAME`, `JOB`), '-') `joined`\n" + "SELECT ARRAY_JOIN(ARRAY(`ENAME`, `JOB`), '-') `joined`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java index 26783296f1c..784fedc2ede 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBasicTest.java @@ -493,11 +493,11 @@ public void testDecimalLiteral() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `DIVIDE`(22, 7.0) `r1`, `DIVIDE`(22, 7.0E0) `r2`, `DIVIDE`(22.0, 7) `r3`," - + " `DIVIDE`(2.20E1, 7) `r4`, 0.1 * 0.2 `r5`, 1E-1 * 2E-1 `r6`, 0.1 + 0.2 `r7`, 1E-1 +" - + " 2E-1 `r8`, 0.06 - 0.01 `r9`, 6E-2 - 1E-2 `r10`, `DIVIDE`(0.1, 0.3) * 0.3 `r11`," - + " `DIVIDE`(1E-1, 3E-1) * 3E-1 `r12`, POWER(POWER(2.0, 5E-1), 2) `r13`," - + " POWER(POWER(2.0E0, 5E-1), 2) `r14`, `DIVIDE`(7.0, 0) `r15`, `DIVIDE`(7, 0.0)" + "SELECT DIVIDE(22, 7.0) `r1`, DIVIDE(22, 7.0E0) `r2`, DIVIDE(22.0, 7) `r3`," + + " DIVIDE(2.20E1, 7) `r4`, 0.1 * 0.2 `r5`, 1E-1 * 2E-1 `r6`, 0.1 + 0.2 `r7`, 1E-1 +" + + " 2E-1 `r8`, 0.06 - 0.01 `r9`, 6E-2 - 1E-2 `r10`, DIVIDE(0.1, 0.3) * 0.3 `r11`," + + " DIVIDE(1E-1, 3E-1) * 3E-1 `r12`, POWER(POWER(2.0, 5E-1), 2) `r13`," + + " POWER(POWER(2.0E0, 5E-1), 2) `r14`, DIVIDE(7.0, 0) `r15`, DIVIDE(7, 0.0)" + " `r16`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java index 0d933fe1649..27d689cc5d6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLBinTest.java @@ -31,7 +31,7 @@ public void testBinWithSpan() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `SPAN_BUCKET`(`SAL`," + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, SPAN_BUCKET(`SAL`," + " 1000) `SAL`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -52,7 +52,7 @@ public void testBinWithBins() { + " LogicalTableScan(table=[[scott, EMP]])\n"); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `WIDTH_BUCKET`(`SAL`," + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, WIDTH_BUCKET(`SAL`," + " 10, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) -" + " (MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))," + " MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))" @@ -78,7 +78,7 @@ public void testBinWithMinspan() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`," - + " `MINSPAN_BUCKET`(`SAL`, 1.000E2, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + + " MINSPAN_BUCKET(`SAL`, 1.000E2, (MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + " PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + " PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED" + " PRECEDING AND UNBOUNDED FOLLOWING)) `SAL`\n" @@ -101,7 +101,7 @@ public void testBinWithStartEnd() { + " LogicalTableScan(table=[[scott, EMP]])\n"); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, `RANGE_BUCKET`(`SAL`," + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `COMM`, `DEPTNO`, RANGE_BUCKET(`SAL`," + " MIN(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)," + " MAX(`SAL`) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), 1000," + " 5000) `SAL`\n" @@ -124,7 +124,7 @@ public void testBinWithTimestampFieldUsingBins() { + " LogicalTableScan(table=[[scott, products_temporal]])\n"); String expectedSparkSql = - "SELECT `ID`, `SUPPLIER`, `SYS_END`, `WIDTH_BUCKET`(`SYS_START`, 10, (MAX(`SYS_START`) OVER" + "SELECT `ID`, `SUPPLIER`, `SYS_END`, WIDTH_BUCKET(`SYS_START`, 10, (MAX(`SYS_START`) OVER" + " (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) - (MIN(`SYS_START`)" + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)), MAX(`SYS_START`)" + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) `SYS_START`\n" @@ -148,7 +148,7 @@ public void testBinWithTimeSpan() { verifyPPLToSparkSQL( root, "SELECT `ID`, `SUPPLIER`, `SYS_END`," - + " `FROM_UNIXTIME`(FLOOR(`DIVIDE`(`DIVIDE`(`UNIX_TIMESTAMP`(`SYS_START`), 3600), 1)) *" + + " FROM_UNIXTIME(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP(`SYS_START`), 3600), 1)) *" + " 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } @@ -170,7 +170,7 @@ public void testBinWithAligntime() { verifyPPLToSparkSQL( root, "SELECT `ID`, `SUPPLIER`, `SYS_END`," - + " `FROM_UNIXTIME`(FLOOR(`DIVIDE`(`DIVIDE`(`UNIX_TIMESTAMP`(`SYS_START`), 3600), 1)) *" + + " FROM_UNIXTIME(FLOOR(DIVIDE(DIVIDE(UNIX_TIMESTAMP(`SYS_START`), 3600), 1)) *" + " 3600) `SYS_START`\n" + "FROM `scott`.`products_temporal`"); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java index bddcde11e18..107fe7b9cb1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartTest.java @@ -192,10 +192,10 @@ public void testChartWithSpan() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `SPAN`(`age`, 10, NULL) `age`, MAX(`balance`) `max(balance)`\n" + "SELECT SPAN(`age`, 10, NULL) `age`, MAX(`balance`) `max(balance)`\n" + "FROM `scott`.`bank`\n" + "WHERE `age` IS NOT NULL AND `balance` IS NOT NULL\n" - + "GROUP BY `SPAN`(`age`, 10, NULL)\n" + + "GROUP BY SPAN(`age`, 10, NULL)\n" + "ORDER BY 1 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -209,17 +209,17 @@ public void testChartWithTimeSpan() { "SELECT `t2`.`timestamp`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END `category`," + " MAX(`t2`.`max(value)`) `max(value)`\n" - + "FROM (SELECT `SPAN`(`timestamp`, 1, 'w') `timestamp`, `category`, MAX(`value`)" + + "FROM (SELECT SPAN(`timestamp`, 1, 'w') `timestamp`, `category`, MAX(`value`)" + " `max(value)`\n" + "FROM `scott`.`time_data`\n" + "WHERE `timestamp` IS NOT NULL AND `value` IS NOT NULL\n" - + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t2`\n" + + "GROUP BY `category`, SPAN(`timestamp`, 1, 'w')) `t2`\n" + "LEFT JOIN (SELECT `category`, SUM(`max(value)`) `__grand_total__`, ROW_NUMBER() OVER" + " (ORDER BY SUM(`max(value)`) DESC) `_row_number_chart_`\n" + "FROM (SELECT `category`, MAX(`value`) `max(value)`\n" + "FROM `scott`.`time_data`\n" + "WHERE `timestamp` IS NOT NULL AND `value` IS NOT NULL\n" - + "GROUP BY `category`, `SPAN`(`timestamp`, 1, 'w')) `t6`\n" + + "GROUP BY `category`, SPAN(`timestamp`, 1, 'w')) `t6`\n" + "WHERE `category` IS NOT NULL\n" + "GROUP BY `category`) `t9` ON `t2`.`category` = `t9`.`category`\n" + "GROUP BY `t2`.`timestamp`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLCountEvalTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLCountEvalTest.java index 91d9a8de09d..833f54e5619 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLCountEvalTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLCountEvalTest.java @@ -65,7 +65,7 @@ public void testCountEvalArithmeticExpression() { + " LogicalTableScan(table=[[scott, EMP]])\n") .expectResult("high_ratio=0\n") .expectSparkSQL( - "SELECT COUNT(CASE WHEN `DIVIDE`(`SAL`, `COMM`) > 10 THEN 1 ELSE NULL END)" + "SELECT COUNT(CASE WHEN DIVIDE(`SAL`, `COMM`) > 10 THEN 1 ELSE NULL END)" + " `high_ratio`\n" + "FROM `scott`.`EMP`"); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDateTimeFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDateTimeFunctionTest.java index ec107dcd948..a7f2cdb9893 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDateTimeFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDateTimeFunctionTest.java @@ -23,7 +23,7 @@ public void testDateAndCurrentTimestamp() { "LogicalProject(added=[DATE(NOW())])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedSparkSql = "SELECT `DATE`(`NOW`()) `added`\nFROM `scott`.`EMP`"; + String expectedSparkSql = "SELECT DATE(NOW()) `added`\nFROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java index ea0194d68cf..70b53d3c6fc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEvalTest.java @@ -135,7 +135,7 @@ public void testEvalAvg() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `DIVIDE`(10 + (20 + 30), 3.0E0) `average`\n" + "FROM `scott`.`EMP`"; + "SELECT `EMPNO`, DIVIDE(10 + (20 + 30), 3.0E0) `average`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -149,7 +149,7 @@ public void testEvalAvgWithFields() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `DIVIDE`(`SAL` + `COMM`, 2.0E0) `avgSal`\n" + "FROM `scott`.`EMP`"; + "SELECT `EMPNO`, DIVIDE(`SAL` + `COMM`, 2.0E0) `avgSal`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -523,13 +523,13 @@ public void testEvalMaxOnNumbers() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], a=[MAX(5, 30, $7)])\n" + + " COMM=[$6], DEPTNO=[$7], a=[SCALAR_MAX(5, 30, $7)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(5, 30," - + " `DEPTNO`) `a`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, SCALAR_MAX(5," + + " 30, `DEPTNO`) `a`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -540,13 +540,13 @@ public void testEvalMaxOnStrings() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], a=[MAX('banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + + " COMM=[$6], DEPTNO=[$7], a=[SCALAR_MAX('banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX('banana'," - + " 'Door', `ENAME`) `a`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " SCALAR_MAX('banana', 'Door', `ENAME`) `a`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -557,13 +557,14 @@ public void testEvalMaxOnNumericAndString() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], a=[MAX(5, 30, $7, 'banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + + " COMM=[$6], DEPTNO=[$7], a=[SCALAR_MAX(5, 30, $7, 'banana':VARCHAR, 'Door':VARCHAR," + + " $1)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(5, 30," - + " `DEPTNO`, 'banana', 'Door', `ENAME`) `a`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, SCALAR_MAX(5," + + " 30, `DEPTNO`, 'banana', 'Door', `ENAME`) `a`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -574,13 +575,13 @@ public void testEvalMinOnNumbers() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], a=[MIN(5, 30, $7)])\n" + + " COMM=[$6], DEPTNO=[$7], a=[SCALAR_MIN(5, 30, $7)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MIN(5, 30," - + " `DEPTNO`) `a`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, SCALAR_MIN(5," + + " 30, `DEPTNO`) `a`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -591,13 +592,13 @@ public void testEvalMinOnStrings() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], a=[MIN('banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + + " COMM=[$6], DEPTNO=[$7], a=[SCALAR_MIN('banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MIN('banana'," - + " 'Door', `ENAME`) `a`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " SCALAR_MIN('banana', 'Door', `ENAME`) `a`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -608,13 +609,14 @@ public void testEvalMinOnNumericAndString() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], a=[MIN(5, 30, $7, 'banana':VARCHAR, 'Door':VARCHAR, $1)])\n" + + " COMM=[$6], DEPTNO=[$7], a=[SCALAR_MIN(5, 30, $7, 'banana':VARCHAR, 'Door':VARCHAR," + + " $1)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MIN(5, 30," - + " `DEPTNO`, 'banana', 'Door', `ENAME`) `a`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, SCALAR_MIN(5," + + " 30, `DEPTNO`, 'banana', 'Door', `ENAME`) `a`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGrokTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGrokTest.java index 376a47d0c9a..b31ca6008ad 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGrokTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGrokTest.java @@ -25,7 +25,7 @@ public void testGrok() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `GROK`(`ENAME`, '.+@%{HOSTNAME:host}', 'grok')['host'] `host`\n" + "SELECT `ENAME`, GROK(`ENAME`, '.+@%{HOSTNAME:host}', 'grok')['host'] `host`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -41,7 +41,7 @@ public void testGrokOverriding() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `GROK`(`ENAME`, '%{NUMBER} %{GREEDYDATA:ENAME}', 'grok')['ENAME'] `ENAME`\n" + "SELECT GROK(`ENAME`, '%{NUMBER} %{GREEDYDATA:ENAME}', 'grok')['ENAME'] `ENAME`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMathFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMathFunctionTest.java index fff01427201..58e29c49a28 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMathFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMathFunctionTest.java @@ -145,7 +145,7 @@ public void testEuler() { String expectedLogical = "LogicalProject(EULER=[E()])\n LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); - String expectedSparkSql = "SELECT `E`() `EULER`\nFROM `scott`.`EMP`"; + String expectedSparkSql = "SELECT E() `EULER`\nFROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java index 8746fe846e5..7185c85aa1d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMultisearchTest.java @@ -301,11 +301,11 @@ public void testMultisearchWithTimestampFiltering() { + "FROM (SELECT `timestamp`, `value`, `category`, `@timestamp`\n" + "FROM (SELECT *\n" + "FROM `scott`.`TIME_DATA1`\n" - + "WHERE `@timestamp` > `TIMESTAMP`('2025-07-31 23:00:00')\n" + + "WHERE `@timestamp` > TIMESTAMP('2025-07-31 23:00:00')\n" + "UNION ALL\n" + "SELECT *\n" + "FROM `scott`.`TIME_DATA2`\n" - + "WHERE `@timestamp` > `TIMESTAMP`('2025-07-31 23:00:00'))\n" + + "WHERE `@timestamp` > TIMESTAMP('2025-07-31 23:00:00'))\n" + "ORDER BY `@timestamp` DESC NULLS FIRST) `t2`\n" + "ORDER BY `@timestamp` DESC"; verifyPPLToSparkSQL(root, expectedSparkSql); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLParseTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLParseTest.java index 6b613f346bb..ad95cd21b34 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLParseTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLParseTest.java @@ -27,7 +27,7 @@ public void testParse() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `JOB`, `PARSE`(`DATE_FORMAT`(`HIREDATE`, '%Y-%m-%d')," + "SELECT `JOB`, PARSE(DATE_FORMAT(`HIREDATE`, '%Y-%m-%d')," + " '(?\\d{4})-\\d{2}-\\d{2}', 'regex')['year'] `year`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -46,7 +46,7 @@ public void testParseOverriding() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `JOB`, `PARSE`(`DATE_FORMAT`(`HIREDATE`, '%Y-%m-%d')," + "SELECT `JOB`, PARSE(DATE_FORMAT(`HIREDATE`, '%Y-%m-%d')," + " '(?\\d{4})-\\d{2}-\\d{2}', 'regex')['MGR'] `MGR`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java index f1dfd930a82..3d3bb5b6a1b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java @@ -64,9 +64,9 @@ public void testPatternsLabelMode_ShowNumberedToken_ForSimplePatternMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['pattern'] AS" - + " STRING) `patterns_field`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR" + + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR" + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END," + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -91,9 +91,9 @@ public void testPatternsLabelModeWithCustomPattern_ShowNumberedToken_ForSimplePa verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['pattern'] AS STRING)" - + " `patterns_field`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` =" + + " `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` =" + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['tokens'] AS" + " MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -138,9 +138,9 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_SimplePattern verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `DEPTNO`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME`" + "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME`" + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END," - + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(`PATTERN_PARSER`(CASE" + + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE" + " WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," + " '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" + " `tokens`\n" @@ -160,7 +160,7 @@ public void testPatternsLabelMode_NotShowNumberedToken_ForBrainMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, FALSE)" + "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(`ENAME`, `pattern`(`ENAME`, 10, 100000, FALSE)" + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), FALSE)['pattern']" + " AS STRING) `patterns_field`\n" + "FROM `scott`.`EMP`"; @@ -183,9 +183,9 @@ public void testPatternsLabelMode_ShowNumberedToken_ForBrainMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, TRUE)" + "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(`ENAME`, `pattern`(`ENAME`, 10, 100000, TRUE)" + " OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), TRUE)['pattern']" - + " AS STRING) `patterns_field`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`," + + " AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(`ENAME`, `pattern`(`ENAME`," + " 10, 100000, TRUE) OVER (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)," + " TRUE)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -206,7 +206,7 @@ public void testPatternsLabelModeWithPartitionBy_NotShowNumberedToken_ForBrainMe verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `DEPTNO`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10," + "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(`ENAME`, `pattern`(`ENAME`, 10," + " 100000, FALSE) OVER (PARTITION BY `DEPTNO` RANGE BETWEEN UNBOUNDED PRECEDING AND" + " UNBOUNDED FOLLOWING), FALSE)['pattern'] AS STRING) `patterns_field`\n" + "FROM `scott`.`EMP`"; @@ -229,10 +229,10 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_ForBrainMetho verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `DEPTNO`, TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10," + "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(`ENAME`, `pattern`(`ENAME`, 10," + " 100000, TRUE) OVER (PARTITION BY `DEPTNO` RANGE BETWEEN UNBOUNDED PRECEDING AND" + " UNBOUNDED FOLLOWING), TRUE)['pattern'] AS STRING) `patterns_field`," - + " TRY_CAST(`PATTERN_PARSER`(`ENAME`, `pattern`(`ENAME`, 10, 100000, TRUE) OVER" + + " TRY_CAST(PATTERN_PARSER(`ENAME`, `pattern`(`ENAME`, 10, 100000, TRUE) OVER" + " (PARTITION BY `DEPTNO` RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)," + " TRUE)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -281,11 +281,11 @@ public void testPatternsAggregationMode_ShowNumberedToken_ForSimplePatternMethod verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + "SELECT TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['pattern']" + " AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END) `pattern_count`," - + " TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + + " TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['tokens']" + " AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" @@ -312,12 +312,12 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForSimplePa verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `DEPTNO`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" - + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + "SELECT `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN ''" + + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END)" - + " `pattern_count`, TRY_CAST(`PATTERN_PARSER`(CASE WHEN `ENAME` IS NULL OR `ENAME` =" - + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''" + + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + " 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10)" + " `sample_logs`\n" + "FROM `scott`.`EMP`\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java index 4ec76823bfe..619cb26b64a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java @@ -33,7 +33,7 @@ public void testRexBasicFieldExtraction() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT`(`ENAME`, '(?[A-Z]).*', 'first') `first`\n" + "SELECT `ENAME`, REX_EXTRACT(`ENAME`, '(?[A-Z]).*', 'first') `first`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -50,8 +50,8 @@ public void testRexMultipleNamedGroups() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT`(`ENAME`, '(?[A-Z])(?.*)', 'first') `first`," - + " `REX_EXTRACT`(`ENAME`, '(?[A-Z])(?.*)', 'rest') `rest`\n" + "SELECT `ENAME`, REX_EXTRACT(`ENAME`, '(?[A-Z])(?.*)', 'first') `first`," + + " REX_EXTRACT(`ENAME`, '(?[A-Z])(?.*)', 'rest') `rest`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -68,7 +68,7 @@ public void testRexWithMaxMatch() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT_MULTI`(`ENAME`, '(?[A-Z])', 'letter', 3) `letter`\n" + "SELECT `ENAME`, REX_EXTRACT_MULTI(`ENAME`, '(?[A-Z])', 'letter', 3) `letter`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -86,8 +86,8 @@ public void testRexChainedCommands() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `JOB`, `REX_EXTRACT`(`ENAME`, '(?^.)', 'firstinitial')" - + " `firstinitial`, `REX_EXTRACT`(`JOB`, '(?\\w+)', 'jobtype') `jobtype`\n" + "SELECT `ENAME`, `JOB`, REX_EXTRACT(`ENAME`, '(?^.)', 'firstinitial')" + + " `firstinitial`, REX_EXTRACT(`JOB`, '(?\\w+)', 'jobtype') `jobtype`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -106,7 +106,7 @@ public void testRexWithWhereClause() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT`(`ENAME`, '(?[A-Z]).*', 'first') `first`, `SAL`\n" + "SELECT `ENAME`, REX_EXTRACT(`ENAME`, '(?[A-Z]).*', 'first') `first`, `SAL`\n" + "FROM `scott`.`EMP`\n" + "WHERE `SAL` > 1000"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -124,9 +124,9 @@ public void testRexWithAggregation() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT COUNT(*) `count()`, `REX_EXTRACT`(`JOB`, '(?\\w+)', 'jobtype') `jobtype`\n" + "SELECT COUNT(*) `count()`, REX_EXTRACT(`JOB`, '(?\\w+)', 'jobtype') `jobtype`\n" + "FROM `scott`.`EMP`\n" - + "GROUP BY `REX_EXTRACT`(`JOB`, '(?\\w+)', 'jobtype')"; + + "GROUP BY REX_EXTRACT(`JOB`, '(?\\w+)', 'jobtype')"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -144,8 +144,8 @@ public void testRexComplexPattern() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT`(`ENAME`, '(?[A-Z]{2})(?[A-Z]+)', 'prefix')" - + " `prefix`, `REX_EXTRACT`(`ENAME`, '(?[A-Z]{2})(?[A-Z]+)', 'suffix')" + "SELECT `ENAME`, REX_EXTRACT(`ENAME`, '(?[A-Z]{2})(?[A-Z]+)', 'prefix')" + + " `prefix`, REX_EXTRACT(`ENAME`, '(?[A-Z]{2})(?[A-Z]+)', 'suffix')" + " `suffix`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -165,7 +165,7 @@ public void testRexWithSort() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT`(`ENAME`, '(?^.)', 'firstletter')" + "SELECT `ENAME`, REX_EXTRACT(`ENAME`, '(?^.)', 'firstletter')" + " `firstletter`\n" + "FROM `scott`.`EMP`\n" + "ORDER BY 2\n" @@ -186,7 +186,7 @@ public void testRexWithMaxMatchZero() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT_MULTI`(`ENAME`, '(?[A-Z])', 'letter', 10) `letter`\n" + "SELECT `ENAME`, REX_EXTRACT_MULTI(`ENAME`, '(?[A-Z])', 'letter', 10) `letter`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -211,7 +211,7 @@ public void testRexWithMaxMatchWithinLimit() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT_MULTI`(`ENAME`, '(?[A-Z])', 'letter', 5) `letter`\n" + "SELECT `ENAME`, REX_EXTRACT_MULTI(`ENAME`, '(?[A-Z])', 'letter', 5) `letter`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -229,7 +229,7 @@ public void testRexWithMaxMatchAtLimit() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT_MULTI`(`ENAME`, '(?[A-Z])', 'letter', 10) `letter`\n" + "SELECT `ENAME`, REX_EXTRACT_MULTI(`ENAME`, '(?[A-Z])', 'letter', 10) `letter`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -261,8 +261,8 @@ public void testRexWithOffsetField() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT`(`ENAME`, '(?[A-Z]).*', 'first') `first`," - + " `REX_OFFSET`(`ENAME`, '(?[A-Z]).*') `offsets`\n" + "SELECT `ENAME`, REX_EXTRACT(`ENAME`, '(?[A-Z]).*', 'first') `first`," + + " REX_OFFSET(`ENAME`, '(?[A-Z]).*') `offsets`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -281,9 +281,9 @@ public void testRexWithMultipleNamedGroupsAndOffsetField() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT`(`ENAME`, '(?[A-Z])(?.*)', 'first') `first`," - + " `REX_EXTRACT`(`ENAME`, '(?[A-Z])(?.*)', 'rest') `rest`," - + " `REX_OFFSET`(`ENAME`, '(?[A-Z])(?.*)') `positions`\n" + "SELECT `ENAME`, REX_EXTRACT(`ENAME`, '(?[A-Z])(?.*)', 'first') `first`," + + " REX_EXTRACT(`ENAME`, '(?[A-Z])(?.*)', 'rest') `rest`," + + " REX_OFFSET(`ENAME`, '(?[A-Z])(?.*)') `positions`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -301,8 +301,8 @@ public void testRexWithMaxMatchAndOffsetField() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `REX_EXTRACT_MULTI`(`ENAME`, '(?[A-Z])', 'letter', 3) `letter`," - + " `REX_OFFSET`(`ENAME`, '(?[A-Z])') `positions`\n" + "SELECT `ENAME`, REX_EXTRACT_MULTI(`ENAME`, '(?[A-Z])', 'letter', 3) `letter`," + + " REX_OFFSET(`ENAME`, '(?[A-Z])') `positions`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSearchTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSearchTest.java index ddb1df95edd..ce5bed94415 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSearchTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSearchTest.java @@ -50,7 +50,7 @@ public void testSearchWithFilter() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT *\nFROM `scott`.`EMP`\nWHERE `query_string`(MAP ('query', 'DEPTNO:20'))"; + "SELECT *\nFROM `scott`.`EMP`\nWHERE QUERY_STRING(MAP ('query', 'DEPTNO:20'))"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -78,7 +78,7 @@ public void testSearchWithAbsoluteTimeRange() { String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`LOGS`\n" - + "WHERE `query_string`(MAP ('query', '(@timestamp:>=2020\\-10\\-11T00\\:00\\:00Z) AND" + + "WHERE QUERY_STRING(MAP ('query', '(@timestamp:>=2020\\-10\\-11T00\\:00\\:00Z) AND" + " (@timestamp:<=2025\\-01\\-01T00\\:00\\:00Z)'))"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java index a6de6e28036..57b11d83150 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLSpathTest.java @@ -27,7 +27,7 @@ public void testSimpleEval() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " `JSON_EXTRACT`(`ENAME`, 'src.path') `src.path`\n" + + " JSON_EXTRACT(`ENAME`, 'src.path') `src.path`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -42,7 +42,7 @@ public void testEvalWithOutput() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `JSON_EXTRACT`(`ENAME`, 'src.path') `custom`\n" + "FROM `scott`.`EMP`"; + "SELECT JSON_EXTRACT(`ENAME`, 'src.path') `custom`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java index ba2169bab6d..42edc924294 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java @@ -125,7 +125,7 @@ public void testToStringBin() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`(`SAL`, 'binary') `salary_binary`, `SAL`\nFROM `scott`.`EMP`"; + "SELECT `ENAME`, TOSTRING(`SAL`, 'binary') `salary_binary`, `SAL`\nFROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -156,7 +156,7 @@ public void testToStringHex() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`(`SAL`, 'hex') `salary_hex`, `SAL`\nFROM `scott`.`EMP`"; + "SELECT `ENAME`, TOSTRING(`SAL`, 'hex') `salary_hex`, `SAL`\nFROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -175,7 +175,7 @@ public void testToStringHexFromNumberAsString() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`('1600', 'hex') `salary_hex`\nFROM `scott`.`EMP`\nLIMIT 1"; + "SELECT `ENAME`, TOSTRING('1600', 'hex') `salary_hex`\nFROM `scott`.`EMP`\nLIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -195,7 +195,7 @@ public void testToStringCommaFromNumberAsString() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`('160040222', 'commas') `salary_comma`\n" + "SELECT `ENAME`, TOSTRING('160040222', 'commas') `salary_comma`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -217,7 +217,7 @@ public void testToStringBinaryFromNumberAsString() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`('160040222', 'binary') `salary_binary`\n" + "SELECT `ENAME`, TOSTRING('160040222', 'binary') `salary_binary`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -252,7 +252,7 @@ public void testToStringCommas() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`(`SAL`, 'commas') `salary_commas`, `SAL`\nFROM `scott`.`EMP`"; + "SELECT `ENAME`, TOSTRING(`SAL`, 'commas') `salary_commas`, `SAL`\nFROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -273,7 +273,7 @@ public void testToStringDuration() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `ENAME`, `TOSTRING`(6500, 'duration') `duration_commas`\n" + "SELECT `ENAME`, TOSTRING(6500, 'duration') `duration_commas`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java index 33a9b84d23b..ca0ff70f0b7 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java @@ -75,10 +75,10 @@ public void testTimechartBasic() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, COUNT(*) `count()`\n" + "SELECT SPAN(`@timestamp`, 1, 'm') `@timestamp`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL\n" - + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" + + "GROUP BY SPAN(`@timestamp`, 1, 'm')\n" + "ORDER BY 1 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -87,14 +87,14 @@ public void testTimechartBasic() { public void testTimechartPerSecond() { withPPLQuery("source=events | timechart per_second(cpu_usage)") .expectSparkSQL( - "SELECT `@timestamp`, `DIVIDE`(`per_second(cpu_usage)` * 1.0000E3," + "SELECT `@timestamp`, DIVIDE(`per_second(cpu_usage)` * 1.0000E3," + " TIMESTAMPDIFF('MILLISECOND', `@timestamp`, TIMESTAMPADD('MINUTE', 1," + " `@timestamp`))) `per_second(cpu_usage)`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + + "FROM (SELECT SPAN(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + " `per_second(cpu_usage)`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" - + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" + + "GROUP BY SPAN(`@timestamp`, 1, 'm')\n" + "ORDER BY 1 NULLS LAST) `t3`"); } @@ -102,14 +102,14 @@ public void testTimechartPerSecond() { public void testTimechartPerMinute() { withPPLQuery("source=events | timechart per_minute(cpu_usage)") .expectSparkSQL( - "SELECT `@timestamp`, `DIVIDE`(`per_minute(cpu_usage)` * 6.00000E4," + "SELECT `@timestamp`, DIVIDE(`per_minute(cpu_usage)` * 6.00000E4," + " TIMESTAMPDIFF('MILLISECOND', `@timestamp`, TIMESTAMPADD('MINUTE', 1," + " `@timestamp`))) `per_minute(cpu_usage)`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + + "FROM (SELECT SPAN(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + " `per_minute(cpu_usage)`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" - + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" + + "GROUP BY SPAN(`@timestamp`, 1, 'm')\n" + "ORDER BY 1 NULLS LAST) `t3`"); } @@ -117,14 +117,14 @@ public void testTimechartPerMinute() { public void testTimechartPerHour() { withPPLQuery("source=events | timechart per_hour(cpu_usage)") .expectSparkSQL( - "SELECT `@timestamp`, `DIVIDE`(`per_hour(cpu_usage)` * 3.6000000E6," + "SELECT `@timestamp`, DIVIDE(`per_hour(cpu_usage)` * 3.6000000E6," + " TIMESTAMPDIFF('MILLISECOND', `@timestamp`, TIMESTAMPADD('MINUTE', 1," + " `@timestamp`))) `per_hour(cpu_usage)`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + + "FROM (SELECT SPAN(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + " `per_hour(cpu_usage)`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" - + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" + + "GROUP BY SPAN(`@timestamp`, 1, 'm')\n" + "ORDER BY 1 NULLS LAST) `t3`"); } @@ -132,14 +132,14 @@ public void testTimechartPerHour() { public void testTimechartPerDay() { withPPLQuery("source=events | timechart per_day(cpu_usage)") .expectSparkSQL( - "SELECT `@timestamp`, `DIVIDE`(`per_day(cpu_usage)` * 8.64E7," + "SELECT `@timestamp`, DIVIDE(`per_day(cpu_usage)` * 8.64E7," + " TIMESTAMPDIFF('MILLISECOND', `@timestamp`, TIMESTAMPADD('MINUTE', 1," + " `@timestamp`))) `per_day(cpu_usage)`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + + "FROM (SELECT SPAN(`@timestamp`, 1, 'm') `@timestamp`, SUM(`cpu_usage`)" + " `per_day(cpu_usage)`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" - + "GROUP BY `SPAN`(`@timestamp`, 1, 'm')\n" + + "GROUP BY SPAN(`@timestamp`, 1, 'm')\n" + "ORDER BY 1 NULLS LAST) `t3`"); } @@ -149,10 +149,10 @@ public void testTimechartWithSpan() { RelNode root = getRelNode(ppl); String expectedSparkSql = - "SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, COUNT(*) `count()`\n" + "SELECT SPAN(`@timestamp`, 1, 'h') `@timestamp`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL\n" - + "GROUP BY `SPAN`(`@timestamp`, 1, 'h')\n" + + "GROUP BY SPAN(`@timestamp`, 1, 'h')\n" + "ORDER BY 1 NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -166,16 +166,16 @@ public void testTimechartWithLimit() { "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END `host`," + " SUM(`t2`.`count()`) `count()`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `count()`\n" + + "FROM (SELECT SPAN(`@timestamp`, 1, 'm') `@timestamp`, `host`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t2`\n" + + "GROUP BY `host`, SPAN(`@timestamp`, 1, 'm')) `t2`\n" + "LEFT JOIN (SELECT `host`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER (ORDER" + " BY SUM(`count()`) DESC) `_row_number_chart_`\n" + "FROM (SELECT `host`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'm')) `t6`\n" + + "GROUP BY `host`, SPAN(`@timestamp`, 1, 'm')) `t6`\n" + "WHERE `host` IS NOT NULL\n" + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n" + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" @@ -192,16 +192,16 @@ public void testTimechartWithSpan1h() { "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`host` ELSE 'OTHER' END `host`," + " SUM(`t2`.`count()`) `count()`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `count()`\n" + + "FROM (SELECT SPAN(`@timestamp`, 1, 'h') `@timestamp`, `host`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t2`\n" + + "GROUP BY `host`, SPAN(`@timestamp`, 1, 'h')) `t2`\n" + "LEFT JOIN (SELECT `host`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER (ORDER" + " BY SUM(`count()`) DESC) `_row_number_chart_`\n" + "FROM (SELECT `host`, COUNT(*) `count()`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t6`\n" + + "GROUP BY `host`, SPAN(`@timestamp`, 1, 'h')) `t6`\n" + "WHERE `host` IS NOT NULL\n" + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n" + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" @@ -218,17 +218,17 @@ public void testTimechartWithSpan1m() { "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`region` IS NULL THEN 'NULL' WHEN" + " `t9`.`_row_number_chart_` <= 10 THEN `t2`.`region` ELSE 'OTHER' END `region`," + " AVG(`t2`.`avg(cpu_usage)`) `avg(cpu_usage)`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" + + "FROM (SELECT SPAN(`@timestamp`, 1, 'm') `@timestamp`, `region`, AVG(`cpu_usage`)" + " `avg(cpu_usage)`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" - + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t2`\n" + + "GROUP BY `region`, SPAN(`@timestamp`, 1, 'm')) `t2`\n" + "LEFT JOIN (SELECT `region`, SUM(`avg(cpu_usage)`) `__grand_total__`, ROW_NUMBER()" + " OVER (ORDER BY SUM(`avg(cpu_usage)`) DESC) `_row_number_chart_`\n" + "FROM (SELECT `region`, AVG(`cpu_usage`) `avg(cpu_usage)`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" - + "GROUP BY `region`, `SPAN`(`@timestamp`, 1, 'm')) `t6`\n" + + "GROUP BY `region`, SPAN(`@timestamp`, 1, 'm')) `t6`\n" + "WHERE `region` IS NOT NULL\n" + "GROUP BY `region`) `t9` ON `t2`.`region` = `t9`.`region`\n" + "GROUP BY `t2`.`@timestamp`, CASE WHEN `t2`.`region` IS NULL THEN 'NULL' WHEN" @@ -255,17 +255,17 @@ public void testTimechartWithLimitAndUseOtherFalse() { "SELECT `t2`.`@timestamp`, CASE WHEN `t2`.`host` IS NULL THEN 'NULL' WHEN" + " `t9`.`_row_number_chart_` <= 3 THEN `t2`.`host` ELSE 'OTHER' END `host`," + " AVG(`t2`.`avg(cpu_usage)`) `avg(cpu_usage)`\n" - + "FROM (SELECT `SPAN`(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" + + "FROM (SELECT SPAN(`@timestamp`, 1, 'h') `@timestamp`, `host`, AVG(`cpu_usage`)" + " `avg(cpu_usage)`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t2`\n" + + "GROUP BY `host`, SPAN(`@timestamp`, 1, 'h')) `t2`\n" + "LEFT JOIN (SELECT `host`, SUM(`avg(cpu_usage)`) `__grand_total__`, ROW_NUMBER() OVER" + " (ORDER BY SUM(`avg(cpu_usage)`) DESC) `_row_number_chart_`\n" + "FROM (SELECT `host`, AVG(`cpu_usage`) `avg(cpu_usage)`\n" + "FROM `scott`.`events`\n" + "WHERE `@timestamp` IS NOT NULL AND `cpu_usage` IS NOT NULL\n" - + "GROUP BY `host`, `SPAN`(`@timestamp`, 1, 'h')) `t6`\n" + + "GROUP BY `host`, SPAN(`@timestamp`, 1, 'h')) `t6`\n" + "WHERE `host` IS NOT NULL\n" + "GROUP BY `host`) `t9` ON `t2`.`host` = `t9`.`host`\n" + "WHERE `t9`.`_row_number_chart_` <= 3\n" From 4766b93e6cf93aa5e1f7917ee6c30824248fa864 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 14 Nov 2025 10:30:03 +0800 Subject: [PATCH 60/99] Reduce RelCompositeTrait of RelCollation to a single collation when creating LogicalSystemLimit (#4758) Signed-off-by: Yuanchun Shen --- .../sql/calcite/plan/LogicalSystemLimit.java | 11 ++- .../rest-api-spec/test/issues/4644.yml | 88 +++++++++++++++++++ 2 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4644.yml diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/LogicalSystemLimit.java b/core/src/main/java/org/opensearch/sql/calcite/plan/LogicalSystemLimit.java index c33854ebe52..6e46b63b976 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/LogicalSystemLimit.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/LogicalSystemLimit.java @@ -65,16 +65,15 @@ private LogicalSystemLimit( } public static LogicalSystemLimit create(SystemLimitType type, RelNode input, RexNode fetch) { - return create(type, input, input.getTraitSet().getCollation(), null, fetch); + return create(type, input, null, fetch); } public static LogicalSystemLimit create( - SystemLimitType type, - RelNode input, - RelCollation collation, - @Nullable RexNode offset, - @Nullable RexNode fetch) { + SystemLimitType type, RelNode input, @Nullable RexNode offset, @Nullable RexNode fetch) { RelOptCluster cluster = input.getCluster(); + List collations = input.getTraitSet().getTraits(RelCollationTraitDef.INSTANCE); + // When there exists multiple sets of equivalent collations, we randomly select one + RelCollation collation = collations == null ? null : collations.get(0); collation = RelCollationTraitDef.INSTANCE.canonize(collation); RelTraitSet traitSet = input.getTraitSet().replace(Convention.NONE).replace(collation); return new LogicalSystemLimit(type, cluster, traitSet, input, collation, offset, fetch); diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4644.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4644.yml new file mode 100644 index 00000000000..f04a1fb920d --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4644.yml @@ -0,0 +1,88 @@ +setup: + - do: + indices.create: + index: timechart-eval-bug + body: + mappings: + properties: + "@timestamp": + type: date + "user": + type: keyword + - do: + bulk: + index: timechart-eval-bug + refresh: true + body: + - '{"index":{}}' + - '{"@timestamp":"2024-05-01T00:15:00Z","user":"alice"}' + - '{"index":{}}' + - '{"@timestamp":"2024-05-01T00:30:00Z","user":"bob"}' + - '{"index":{}}' + - '{"@timestamp":"2024-05-02T00:45:00Z","user":"alice"}' + - '{"index":{}}' + - '{"@timestamp":"2024-05-03T00:20:00Z","user":"bob"}' + +--- +"timechart with eval on @timestamp field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=timechart-eval-bug | where HOUR(@timestamp) = 0 | timechart span=1d COUNT() by user | eval copyTimestamp = @timestamp + + - match: { total: 4 } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "user", "type": "string" }, { "name": "COUNT()", "type": "bigint" }, { "name": "copyTimestamp", "type": "timestamp" }] } + - match: {"datarows": [["2024-05-01 00:00:00", "alice", 1, "2024-05-01 00:00:00"], ["2024-05-01 00:00:00", "bob", 1, "2024-05-01 00:00:00"], ["2024-05-02 00:00:00", "alice", 1, "2024-05-02 00:00:00"], ["2024-05-03 00:00:00", "bob", 1, "2024-05-03 00:00:00"]]} + +--- +"timechart with eval on by field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=timechart-eval-bug | where HOUR(@timestamp) = 0 | timechart span=1d COUNT() by user | eval copyUser = user + + - match: { total: 4 } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "user", "type": "string" }, { "name": "COUNT()", "type": "bigint" }, { "name": "copyUser", "type": "string" }] } + - match: {"datarows": [["2024-05-01 00:00:00", "alice", 1, "alice"], ["2024-05-01 00:00:00", "bob", 1, "bob"], ["2024-05-02 00:00:00", "alice", 1, "alice"], ["2024-05-03 00:00:00", "bob", 1, "bob"]]} + +--- +"timechart with eval on aggregated field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=timechart-eval-bug | where HOUR(@timestamp) = 0 | timechart span=1d COUNT() by user | eval doubleCount = `COUNT()` * 2 + + - match: { total: 4 } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "user", "type": "string" }, { "name": "COUNT()", "type": "bigint" }, { "name": "doubleCount", "type": "bigint" }] } + - match: {"datarows": [["2024-05-01 00:00:00", "alice", 1, 2], ["2024-05-01 00:00:00", "bob", 1, 2], ["2024-05-02 00:00:00", "alice", 1, 2], ["2024-05-03 00:00:00", "bob", 1, 2]]} + +--- +"timechart with DATE_FORMAT on @timestamp": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=timechart-eval-bug | where HOUR(@timestamp) = 0 | timechart span=1d COUNT() by user | eval formatted_date = DATE_FORMAT(@timestamp, 'yyyy-MM-dd') + + - match: { total: 4 } + - match: { "schema": [ { "name": "@timestamp", "type": "timestamp" }, { "name": "user", "type": "string" }, { "name": "COUNT()", "type": "bigint" }, { "name": "formatted_date", "type": "string" }] } + - match: {"datarows": [["2024-05-01 00:00:00", "alice", 1, "2024-05-01"], ["2024-05-01 00:00:00", "bob", 1, "2024-05-01"], ["2024-05-02 00:00:00", "alice", 1, "2024-05-02"], ["2024-05-03 00:00:00", "bob", 1, "2024-05-03"]]} From 9fabc44d60b007b775becddb4ca9a4029a312b56 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Fri, 14 Nov 2025 13:22:22 +0800 Subject: [PATCH 61/99] Fix UT failure and Linkchecker failure (#4809) --- docs/dev/intro-v3-architecture.md | 2 +- .../sql/ppl/calcite/CalcitePPLArrayFunctionTest.java | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/dev/intro-v3-architecture.md b/docs/dev/intro-v3-architecture.md index 16324caba39..fc5bf3237d1 100644 --- a/docs/dev/intro-v3-architecture.md +++ b/docs/dev/intro-v3-architecture.md @@ -98,7 +98,7 @@ In Linkedin, they created a internal Calcite repo to convert Pig Latin scripts i - Have an optimizer rule to optimize Pig group/cogroup into Aggregate operators - Implement other RelNode in Rel2Sql so that Pig Latin can be translated into SQL -This [work](https://issues.apache.org/jira/browse/CALCITE-3122) had contributed to Apache Calcite and named [Piglet](https://calcite.apache.org/javadocAggregate/org/apache/calcite/piglet/package-summary.html). It allows users to write queries in Pig Latin, and execute them using any applicable Calcite adapter. +This work had contributed to Apache Calcite \(CALCITE-3122\) and named [Piglet](https://calcite.apache.org/javadocAggregate/org/apache/calcite/piglet/package-summary.html). It allows users to write queries in Pig Latin, and execute them using any applicable Calcite adapter. Pig Latin leverage `RelBuilder` to implement as a third-part front-end language (dialect). diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java index 4dec305d28c..bffa20175d5 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java @@ -127,7 +127,7 @@ public void testMvindexSingleElementPositive() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `array`('a', 'b', 'c')[1 + 1] `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + "SELECT ARRAY('a', 'b', 'c')[1 + 1] `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -152,7 +152,7 @@ public void testMvindexSingleElementNegative() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `array`('a', 'b', 'c')[ARRAY_LENGTH(`array`('a', 'b', 'c')) + -1 + 1]" + "SELECT ARRAY('a', 'b', 'c')[ARRAY_LENGTH(ARRAY('a', 'b', 'c')) + -1 + 1]" + " `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; @@ -179,7 +179,7 @@ public void testMvindexRangePositive() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT ARRAY_SLICE(`array`(1, 2, 3, 4, 5), 1, 3 - 1 + 1) `result`\n" + "SELECT ARRAY_SLICE(ARRAY(1, 2, 3, 4, 5), 1, 3 - 1 + 1) `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -207,8 +207,8 @@ public void testMvindexRangeNegative() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT ARRAY_SLICE(`array`(1, 2, 3, 4, 5), ARRAY_LENGTH(`array`(1, 2, 3, 4, 5)) + -3," - + " ARRAY_LENGTH(`array`(1, 2, 3, 4, 5)) + -1 - (ARRAY_LENGTH(`array`(1, 2, 3, 4, 5))" + "SELECT ARRAY_SLICE(ARRAY(1, 2, 3, 4, 5), ARRAY_LENGTH(ARRAY(1, 2, 3, 4, 5)) + -3," + + " ARRAY_LENGTH(ARRAY(1, 2, 3, 4, 5)) + -1 - (ARRAY_LENGTH(ARRAY(1, 2, 3, 4, 5))" + " + -3) + 1) `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; From 7cd911e2c41bad1e39198a037f7df811c9e8dd42 Mon Sep 17 00:00:00 2001 From: Kai Huang <105710027+ahkcs@users.noreply.github.com> Date: Fri, 14 Nov 2025 14:18:36 -0800 Subject: [PATCH 62/99] doc update (#4803) --- docs/user/ppl/functions/json.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/user/ppl/functions/json.rst b/docs/user/ppl/functions/json.rst index 0114cf5ef9b..26db6a6da93 100644 --- a/docs/user/ppl/functions/json.rst +++ b/docs/user/ppl/functions/json.rst @@ -62,6 +62,32 @@ Example:: | json scalar string | "abc" | "abc" | +--------------------+---------------------------------+---------------------------------+ +JSON_VALID +---------- + +Description +>>>>>>>>>>> + +Version: 3.1.0 + +Limitation: Only works when plugins.calcite.enabled=true + +Usage: `json_valid(value)` Evaluates whether a string uses valid JSON syntax. Returns TRUE if valid, FALSE if invalid. NULL input returns NULL. + +Argument type: STRING + +Return type: BOOLEAN + +Example:: + + os> source=people | eval is_valid_json = json_valid('[1,2,3,4]'), is_invalid_json = json_valid('{invalid}') | fields is_valid_json, is_invalid_json | head 1 + fetched rows / total rows = 1/1 + +---------------+-----------------+ + | is_valid_json | is_invalid_json | + |---------------+-----------------| + | True | False | + +---------------+-----------------+ + JSON_OBJECT ---------- From 09af1b2c0203a11d6bf26f623ef2c2dd9ff5aee2 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Tue, 18 Nov 2025 14:04:34 +0800 Subject: [PATCH 63/99] Pushdown sort by complex expressions to scan (#4750) * Pushdown sort expression to scan Signed-off-by: Songkan Tang * Refactor a bit of SortExprIndexScanRule Signed-off-by: Songkan Tang * Simplify some code and add more explain tests Signed-off-by: Songkan Tang * Support null ordering for sort expression pushdown Signed-off-by: Songkan Tang * Attempt to fix security tests and add more test cases Signed-off-by: Songkan Tang * Address comments Signed-off-by: Songkan Tang * Address more comments Signed-off-by: Songkan Tang --------- Signed-off-by: Songkan Tang --- .../sql/calcite/utils/PlanUtils.java | 20 + .../sql/calcite/remote/CalciteExplainIT.java | 74 ++++ .../org/opensearch/sql/ppl/SortCommandIT.java | 93 ++++ ...complex_sort_expr_no_expr_output_push.yaml | 9 + ...n_complex_sort_expr_project_then_sort.yaml | 9 + .../explain_complex_sort_expr_push.yaml | 10 + ...lex_sort_expr_single_expr_output_push.yaml | 10 + .../explain_complex_sort_nested_expr.yaml | 10 + .../explain_complex_sort_then_field_sort.yaml | 17 + .../explain_sort_complex_and_simple_expr.yaml | 10 + .../calcite/explain_sort_type_push.json | 2 +- ...complex_sort_expr_no_expr_output_push.yaml | 13 + ...n_complex_sort_expr_project_then_sort.yaml | 11 + .../explain_complex_sort_expr_push.yaml | 12 + ...lex_sort_expr_single_expr_output_push.yaml | 12 + .../explain_complex_sort_nested_expr.yaml | 12 + .../explain_complex_sort_then_field_sort.yaml | 15 + .../explain_sort_complex_and_simple_expr.yaml | 12 + .../ExpandCollationOnProjectExprRule.java | 145 ++++-- .../planner/rules/LimitIndexScanRule.java | 4 +- .../planner/rules/OpenSearchIndexRules.java | 5 +- .../planner/rules/SortExprIndexScanRule.java | 261 +++++++++++ .../opensearch/request/AggregateAnalyzer.java | 3 +- .../request/OpenSearchRequestBuilder.java | 13 +- .../opensearch/request/PredicateAnalyzer.java | 16 +- .../scan/AbstractCalciteIndexScan.java | 10 +- .../storage/scan/CalciteLogicalIndexScan.java | 108 ++++- .../storage/scan/context/PushDownContext.java | 22 +- .../storage/scan/context/PushDownType.java | 1 + .../storage/scan/context/SortExprDigest.java | 105 +++++ .../storage/script/CalciteScriptEngine.java | 6 + .../script/sort/CalciteNumberSortScript.java | 64 +++ .../sort/CalciteNumberSortScriptFactory.java | 38 ++ .../CalciteNumberSortScriptLeafFactory.java | 45 ++ .../script/sort/CalciteStringSortScript.java | 66 +++ .../sort/CalciteStringSortScriptFactory.java | 38 ++ .../CalciteStringSortScriptLeafFactory.java | 41 ++ .../opensearch/util/OpenSearchRelOptUtil.java | 108 +++++ .../util/OpenSearchRelOptUtilTest.java | 411 ++++++++++++++++++ 39 files changed, 1820 insertions(+), 41 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_no_expr_output_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_project_then_sort.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_single_expr_output_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_nested_expr.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_then_field_sort.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sort_complex_and_simple_expr.yaml create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptFactory.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptFactory.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index ded7ba541a4..633472d958f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -68,6 +68,9 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; + String DIRECTION = "DIRECTION"; + String NULL_DIRECTION = "NULL_DIRECTION"; + static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { case MICROSECOND -> SpanUnit.MICROSECOND; @@ -532,6 +535,23 @@ static boolean sortByFieldsOnly(Sort sort) { return !sort.getCollation().getFieldCollations().isEmpty() && sort.fetch == null; } + /** + * Check if the sort collation points to non field project expression. + * + * @param sort the sort operator adding sort order over project + * @param project project operation that may contain non field expressions + * @return flag to indicate whether non field project expression will be sorted + */ + static boolean sortReferencesExpr(Sort sort, Project project) { + if (sort.getCollation().getFieldCollations().isEmpty()) { + return false; + } + return sort.getCollation().getFieldCollations().stream() + .anyMatch( + relFieldCollation -> + project.getProjects().get(relFieldCollation.getFieldIndex()) instanceof RexCall); + } + /** * Get a string representation of the argument types expressed in ExprType for error messages. * diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 85e39dc062e..46a86686cb6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -872,6 +872,80 @@ public void testSimpleSortExpressionPushDownWithOnlyExprProjected() throws Excep assertJsonEqualsIgnoreId(expected, result); } + @Test + public void testComplexSortExpressionPushDownExplain() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | sort age2 | fields age," + + " age2"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_expr_push.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testComplexSortExpressionPushDownWithOnlyExprProjected() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | sort age2 | fields" + + " age2"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_expr_single_expr_output_push.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testComplexSortExpressionPushDownWithoutExprProjected() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | sort age2 | fields age"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_expr_no_expr_output_push.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testComplexSortExpressionProjectThenSort() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | fields age, age2 | sort" + + " age2"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_expr_project_then_sort.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + /* + * TODO: A potential optimization is to leverage RexSimplify to simplify -(+($10, $7), $10) to $7 + * Above simplification can only work when $10 is nonnull and there is no precision loss of + * expression calculation + */ + @Test + public void testSortNestedComplexExpression() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance, age3 = age2 - age | sort" + + " age3"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_nested_expr.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testSortComplexExpressionThenSortField() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance | sort age2, age | eval" + + " balance2 = abs(balance) | sort age"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_complex_sort_then_field_sort.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testSortComplexExprMixedWithSimpleExpr() throws Exception { + String query = + "source=opensearch-sql_test_index_bank| eval age2 = age + balance, balance2 = balance + 1 |" + + " sort age2, balance2 "; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_sort_complex_and_simple_expr.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + @Test public void testRexExplain() throws IOException { String query = diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java index b760a9c5546..a9001f5c995 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java @@ -10,7 +10,9 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyOrder; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; import java.util.ArrayList; @@ -321,4 +323,95 @@ public void testHeadThenSort() throws IOException { verifyOrder(result, rows(28), rows(32)); } } + + @Test + public void testSortComplexExpression() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval age2 = age + balance | sort age2 | fields age, balance, age2", + TEST_INDEX_BANK)); + verifyOrder( + result, + rows(33, 4180, 4213), + rows(36, 5686, 5722), + rows(36, 16418, 16454), + rows(28, 32838, 32866), + rows(32, 39225, 39257), + rows(39, 40540, 40579), + rows(34, 48086, 48120)); + } + + @Test + public void testSortComplexExpressionThenHead() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval age2 = age + balance | sort age2 | fields age, balance, age2 |" + + " head 2", + TEST_INDEX_BANK)); + verifyOrder(result, rows(33, 4180, 4213), rows(36, 5686, 5722)); + } + + @Test + public void testPushdownSortStringExpression() throws IOException { + String ppl = + String.format( + "source=%s | eval firstname2 = substring(firstname, 1, 3) | sort firstname2 | fields" + + " firstname2, firstname", + TEST_INDEX_BANK_WITH_NULL_VALUES); + + JSONObject result = executeQuery(ppl); + verifySchema(result, schema("firstname2", "string"), schema("firstname", "string")); + verifyOrder( + result, + rows("Amb", "Amber JOHnny"), + rows("Dal", "Dale"), + rows("Dil", "Dillard"), + rows("Eli", "Elinor"), + rows("Hat", "Hattie"), + rows("Nan", "Nanette"), + rows("Vir", "Virginia")); + } + + @Test + public void testPushdownSortExpressionContainsNull() throws IOException { + String ppl = + String.format( + "source=%s | eval balance2 = abs(balance) | sort -balance2 | fields balance, balance2", + TEST_INDEX_BANK_WITH_NULL_VALUES); + + JSONObject result = executeQuery(ppl); + verifySchema(result, schema("balance", "bigint"), schema("balance2", "bigint")); + verifyOrder( + result, + rows(48086, 48086), + rows(39225, 39225), + rows(32838, 32838), + rows(4180, 4180), + rows(null, null), + rows(null, null), + rows(null, null)); + } + + @Test + public void testPushdownSortExpressionWithMixedFieldSort() throws IOException { + String ppl = + String.format( + "source=%s | eval balance2 = abs(balance) | sort -balance2, account_number | fields" + + " balance2, account_number", + TEST_INDEX_BANK_WITH_NULL_VALUES); + + JSONObject result = executeQuery(ppl); + verifySchema(result, schema("balance2", "bigint"), schema("account_number", "bigint")); + verifyOrder( + result, + rows(48086, 32), + rows(39225, 1), + rows(32838, 13), + rows(4180, 18), + rows(null, 6), + rows(null, 20), + rows(null, 25)); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml new file mode 100644 index 00000000000..6461c40061c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$10]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST], LIMIT->10000, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml new file mode 100644 index 00000000000..256c393b069 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(age=[$10], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age=[$t0], $f1=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml new file mode 100644 index 00000000000..b509fb3117f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$10], age2=[$19]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age=[$t0], $f1=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml new file mode 100644 index 00000000000..db832db7b4d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age2=[$19]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], $f0=[$t2]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml new file mode 100644 index 00000000000..b57328afbae --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$14], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], age3=[$20]) + LogicalSort(sort0=[$20], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)], age3=[-(+($10, $7), $10)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[-($t13, $t10)], proj#0..14=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[-(+($10, $7), $10) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydAHbewogICJvcCI6IHsKICAgICJuYW1lIjogIi0iLAogICAgImtpbmQiOiAiTUlOVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICIrIiwKICAgICAgICAia2luZCI6ICJQTFVTIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDEsCiAgICAgICAgICAibmFtZSI6ICIkMSIKICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0sCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAnQAB2JhbGFuY2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05HdAADYWdlfnEAfgAKdAAHSU5URUdFUnh4\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml new file mode 100644 index 00000000000..d9726a2beb1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_then_field_sort.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$10], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], balance2=[$20]) + LogicalSort(sort0=[$10], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[$19], balance2=[ABS($7)]) + LogicalSort(sort0=[$19], sort1=[$10], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[ABS($t7)], proj#0..14=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT->[{ + "age" : { + "order" : "asc", + "missing" : "_first" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"age":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml new file mode 100644 index 00000000000..ee2254f5420 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$13], sort1=[$14], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], balance2=[$20]) + LogicalSort(sort0=[$19], sort1=[$20], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)], balance2=[+($7, 1)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[1], expr#15=[+($t7, $t14)], proj#0..13=[{exprs}], $f14=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST, balance ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}},{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json index 87801c949fc..054bc203640 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalSort(sort0=[$17], dir0=[ASC-nulls-first])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], $f17=[SAFE_CAST($8)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..1=[{inputs}], age=[$t0])\n EnumerableLimit(fetch=[10000])\n EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[SAFE_CAST($t0)], proj#0..1=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], SORT_EXPR->[SAFE_CAST($0) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAensKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQA3XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTQUZFX0NBU1QiLAogICAgImtpbmQiOiAiU0FGRV9DQVNUIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXSwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAADYWdlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3h4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"NULL_DIRECTION\":\"FIRST\",\"DIRECTION\":\"ASCENDING\",\"utcTimestamp\":*}},\"type\":\"number\",\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_no_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_no_expr_output_push.yaml new file mode 100644 index 00000000000..5c479c6867e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_no_expr_output_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$10]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], age=[$t0]) + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], age=[$t10], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_project_then_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_project_then_sort.yaml new file mode 100644 index 00000000000..a95c277b40e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_project_then_sort.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[ASC-nulls-first]) + LogicalProject(age=[$10], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], age=[$t10], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_push.yaml new file mode 100644 index 00000000000..ef4ea5fc43e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_push.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age=[$10], age2=[$19]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], age=[$t10], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_single_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_single_expr_output_push.yaml new file mode 100644 index 00000000000..7df4a4d7f4e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_expr_single_expr_output_push.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(age2=[$19]) + LogicalSort(sort0=[$19], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_nested_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_nested_expr.yaml new file mode 100644 index 00000000000..711608264eb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_nested_expr.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$14], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], age3=[$20]) + LogicalSort(sort0=[$20], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)], age3=[-(+($10, $7), $10)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$14], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], expr#20=[-($t19, $t10)], proj#0..12=[{exprs}], age2=[$t19], age3=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_then_field_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_then_field_sort.yaml new file mode 100644 index 00000000000..362f847ae6e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_complex_sort_then_field_sort.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$10], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], balance2=[$20]) + LogicalSort(sort0=[$10], dir0=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[$19], balance2=[ABS($7)]) + LogicalSort(sort0=[$19], sort1=[$10], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[ABS($t7)], proj#0..14=[{exprs}]) + EnumerableSort(sort0=[$10], dir0=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], proj#0..12=[{exprs}], age2=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sort_complex_and_simple_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sort_complex_and_simple_expr.yaml new file mode 100644 index 00000000000..873a778f979 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sort_complex_and_simple_expr.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$13], sort1=[$14], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], age2=[$19], balance2=[$20]) + LogicalSort(sort0=[$19], sort1=[$20], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)], balance2=[+($7, 1)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$13], sort1=[$14], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[+($t10, $t7)], expr#20=[1], expr#21=[+($t7, $t20)], proj#0..12=[{exprs}], age2=[$t19], balance2=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java index 204ecacbd39..a09be09a34b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java @@ -5,22 +5,27 @@ package org.opensearch.sql.opensearch.planner.rules; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; +import java.util.function.Predicate; import org.apache.calcite.adapter.enumerable.EnumerableProject; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; import org.apache.calcite.plan.RelTrait; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.plan.volcano.AbstractConverter; +import org.apache.calcite.plan.volcano.RelSubset; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelFieldCollation; -import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Project; import org.apache.commons.lang3.tuple.Pair; import org.immutables.value.Value; import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan; import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; /** @@ -29,6 +34,9 @@ * takes effect, the input collation is changed to a sort over field instead of original sort over * expression. It changes the collation requirement of the whole query. * + *

Another problem is if sort expression is pushed down to scan, the Enumerable project doesn't + * know the collation is already satisfied. + * *

AbstractConverter physical node is supposed to resolve the problem of inconsistent collation * requirement between physical node input and output. This optimization rule finds equivalent * output expression collations and input field collations. If their collation traits are satisfied, @@ -48,52 +56,136 @@ public void onMatch(RelOptRuleCall call) { final Project project = call.rel(1); final RelTraitSet toTraits = converter.getTraitSet(); final RelCollation toCollation = toTraits.getTrait(RelCollationTraitDef.INSTANCE); - final RelTrait fromTrait = - project.getInput().getTraitSet().getTrait(RelCollationTraitDef.INSTANCE); + + assert toCollation != null && toCollation.getFieldCollations() != null + : "Output field collations should not be null"; + + Map>> orderEquivInfoMap = new HashMap<>(); + for (RelFieldCollation relFieldCollation : toCollation.getFieldCollations()) { + orderEquivInfoMap.put( + relFieldCollation.getFieldIndex(), + OpenSearchRelOptUtil.getOrderEquivalentInputInfo( + project.getProjects().get(relFieldCollation.getFieldIndex()))); + } + + // Branch 1: Check if complex expressions are already sorted by scan and assign collation + if (handleComplexExpressionsSortedByScan( + call, project, toTraits, toCollation, orderEquivInfoMap)) { + return; + } + + // Branch 2: Handle simple expressions that can be transformed to field sorts + handleSimpleExpressionFieldSorts(call, project, toTraits, toCollation, orderEquivInfoMap); + } + + /** + * Handle the case where complex expressions are already sorted by the scan. In this case, we can + * directly assign toTrait to the new EnumerableProject. + * + * @return true if handled, false if not applicable + */ + private boolean handleComplexExpressionsSortedByScan( + RelOptRuleCall call, + Project project, + RelTraitSet toTraits, + RelCollation toCollation, + Map>> orderEquivInfoMap) { + + // Check if toCollation is null or not a simple RelCollation with field collations + if (toCollation == null || toCollation.getFieldCollations().isEmpty()) { + return false; + } + + // Extract the actual enumerable scan from the input, handling RelSubset case + CalciteEnumerableIndexScan scan = extractEnumerableScanFromInput(project.getInput()); + if (scan == null) { + return false; + } + + // Check if the scan can provide the required sort collation + if (OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, toCollation, orderEquivInfoMap)) { + // The scan has already provided the sorting for complex expressions + // We can directly assign toTrait to new EnumerableProject + Project newProject = + project.copy(toTraits, project.getInput(), project.getProjects(), project.getRowType()); + call.transformTo(newProject); + return true; + } + return false; + } + + /** + * Handle simple expressions that can be transformed to field sorts using + * getOrderEquivalentInputInfo. + */ + private void handleSimpleExpressionFieldSorts( + RelOptRuleCall call, + Project project, + RelTraitSet toTraits, + RelCollation toCollation, + Map>> orderEquivInfoMap) { + + RelTrait fromTrait = project.getInput().getTraitSet().getTrait(RelCollationTraitDef.INSTANCE); + // In case of fromTrait is an instance of RelCompositeTrait, it most likely finds equivalence by // default. // Let it go through default ExpandConversionRule to determine trait satisfaction. - if (fromTrait != null && fromTrait instanceof RelCollation) { + if (fromTrait instanceof RelCollation) { RelCollation fromCollation = (RelCollation) fromTrait; // TODO: Handle the case where multi expr collations are mapped to the same source field if (toCollation == null || toCollation.getFieldCollations().isEmpty() - || fromCollation == null || fromCollation.getFieldCollations().size() < toCollation.getFieldCollations().size()) { return; } for (int i = 0; i < toCollation.getFieldCollations().size(); i++) { - RelFieldCollation targetFieldCollation = toCollation.getFieldCollations().get(i); - Optional> equivalentCollationInputInfo = - OpenSearchRelOptUtil.getOrderEquivalentInputInfo( - project.getProjects().get(targetFieldCollation.getFieldIndex())); - - if (equivalentCollationInputInfo.isEmpty()) { - return; - } - - RelFieldCollation sourceFieldCollation = fromCollation.getFieldCollations().get(i); - int equivalentSourceIndex = equivalentCollationInputInfo.get().getLeft(); - Direction equivalentSourceDirection = - equivalentCollationInputInfo.get().getRight() - ? targetFieldCollation.getDirection().reverse() - : targetFieldCollation.getDirection(); - if (!(equivalentSourceIndex == sourceFieldCollation.getFieldIndex() - && equivalentSourceDirection == sourceFieldCollation.getDirection())) { + RelFieldCollation toCollationFieldCollation = toCollation.getFieldCollations().get(i); + if (!OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + fromCollation.getFieldCollations().get(i), + toCollationFieldCollation, + orderEquivInfoMap.get(toCollationFieldCollation.getFieldIndex()))) { return; } } // After collation equivalence analysis, fromTrait satisfies toTrait. Copy the target trait - // set - // to new EnumerableProject. + // set to new EnumerableProject. Project newProject = project.copy(toTraits, project.getInput(), project.getProjects(), project.getRowType()); call.transformTo(newProject); } } + /** + * Extract CalciteEnumerableIndexScan from the input RelNode, handling RelSubset case. Since this + * rule matches EnumerableProject, we expect CalciteEnumerableIndexScan during physical + * optimization. + * + * @param input The input RelNode to extract scan from + * @return CalciteEnumerableIndexScan if found, null otherwise + */ + private static CalciteEnumerableIndexScan extractEnumerableScanFromInput(RelNode input) { + + // Case 1: Direct CalciteEnumerableIndexScan (physical scan) + if (input instanceof CalciteEnumerableIndexScan) { + return (CalciteEnumerableIndexScan) input; + } + + // Case 2: RelSubset with best plan being a CalciteEnumerableIndexScan + if (input instanceof RelSubset) { + RelSubset subset = (RelSubset) input; + RelNode bestPlan = subset.getBest(); + if (bestPlan != null) { + // Recursively check the best plan + return extractEnumerableScanFromInput(bestPlan); + } + } + + return null; + } + @Value.Immutable public interface Config extends OpenSearchRuleConfig { @@ -110,8 +202,9 @@ public interface Config extends OpenSearchRuleConfig { .oneInput( b1 -> b1.operand(EnumerableProject.class) - .predicate(PlanUtils::projectContainsExpr) - .predicate(p -> !p.containsOver()) + .predicate( + Predicate.not(Project::containsOver) + .and(PlanUtils::projectContainsExpr)) .anyInputs())); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java index 5d5412ce86d..ce99431fa8b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java @@ -50,7 +50,7 @@ public void onMatch(RelOptRuleCall call) { } } - private static Integer extractLimitValue(RexNode fetch) { + public static Integer extractLimitValue(RexNode fetch) { // fetch is always a integer literal (specified in our PPL/SQL syntax) if (fetch instanceof RexLiteral) { return ((RexLiteral) fetch).getValueAs(Integer.class); @@ -71,7 +71,7 @@ private static Integer extractLimitValue(RexNode fetch) { * @param offset The RexNode representing the offset. * @return The extracted offset value, or null if it cannot be determined. */ - private static Integer extractOffsetValue(RexNode offset) { + public static Integer extractOffsetValue(RexNode offset) { if (Objects.isNull(offset)) { return 0; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index c7f007bbf49..42262097333 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -35,6 +35,8 @@ public class OpenSearchIndexRules { SortAggregateMeasureRule.Config.DEFAULT.toRule(); private static final RareTopPushdownRule RARE_TOP_PUSH_DOWN = RareTopPushdownRule.Config.DEFAULT.toRule(); + private static final SortExprIndexScanRule SORT_EXPR_INDEX_SCAN = + SortExprIndexScanRule.Config.DEFAULT.toRule(); // Rule that always pushes down relevance functions regardless of pushdown settings public static final RelevanceFunctionPushdownRule RELEVANCE_FUNCTION_PUSHDOWN = @@ -54,7 +56,8 @@ public class OpenSearchIndexRules { SORT_PROJECT_EXPR_TRANSPOSE, SORT_AGGREGATION_METRICS_RULE, RARE_TOP_PUSH_DOWN, - EXPAND_COLLATION_ON_PROJECT_EXPR); + EXPAND_COLLATION_ON_PROJECT_EXPR, + SORT_EXPR_INDEX_SCAN); // prevent instantiation private OpenSearchIndexRules() {} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java new file mode 100644 index 00000000000..557eb3ce46e --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java @@ -0,0 +1,261 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.planner.rules; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Predicate; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.immutables.value.Value; +import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; +import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; +import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; + +/** + * Rule to match sort-project-scan RelNode pattern and push down sort expressions to OpenSearch. + * This rule identifies sort operations with complex expressions and attempts to push them down to + * the OpenSearch level for better performance. + */ +@Value.Enclosing +public class SortExprIndexScanRule extends RelRule { + + protected SortExprIndexScanRule(SortExprIndexScanRule.Config config) { + super(config); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final LogicalSort sort = call.rel(0); + final LogicalProject project = call.rel(1); + final CalciteLogicalIndexScan scan = call.rel(2); + + // Only match sort - project - scan when any sort key references an expression + if (!PlanUtils.sortReferencesExpr(sort, project)) { + return; + } + + boolean allSimpleExprs = true; + Map>> orderEquivInfoMap = new HashMap<>(); + + for (RelFieldCollation relFieldCollation : sort.getCollation().getFieldCollations()) { + Optional> orderEquivInfo = + OpenSearchRelOptUtil.getOrderEquivalentInputInfo( + project.getProjects().get(relFieldCollation.getFieldIndex())); + orderEquivInfoMap.put(relFieldCollation.getFieldIndex(), orderEquivInfo); + if (allSimpleExprs && orderEquivInfo.isEmpty()) { + allSimpleExprs = false; + } + } + + if (allSimpleExprs) { + return; + } + + boolean scanProvidesRequiredCollation = + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, sort.collation, orderEquivInfoMap); + if (scan.isTopKPushed() && !scanProvidesRequiredCollation) { + return; + } + + // Extract sort expressions with collation information from the sort node + List sortExprDigests = + extractSortExpressionInfos(sort, project, scan, orderEquivInfoMap); + + // Check if any sort expressions can be pushed down + if (sortExprDigests.isEmpty() || !canPushDownSortExpressionInfos(sortExprDigests)) { + return; + } + + CalciteLogicalIndexScan newScan; + // If the scan's sort info already satisfies new sort, just pushdown limit if there is + if (scan.isTopKPushed() && scanProvidesRequiredCollation) { + newScan = scan.copy(); + } else { + // Attempt to push down sort expressions + newScan = scan.pushdownSortExpr(sortExprDigests); + } + + Integer limitValue = LimitIndexScanRule.extractLimitValue(sort.fetch); + Integer offsetValue = LimitIndexScanRule.extractOffsetValue(sort.offset); + if (newScan != null && limitValue != null && offsetValue != null) { + newScan = (CalciteLogicalIndexScan) newScan.pushDownLimit(sort, limitValue, offsetValue); + } + + if (newScan != null) { + Project newProject = + project.copy(sort.getTraitSet(), newScan, project.getProjects(), project.getRowType()); + call.transformTo(newProject); + } + } + + /** + * Extract sort expressions with collation information from the sort node, mapping them through + * the project if necessary. + * + * @param sort The sort node + * @param project The project node + * @param scan The scan node to get stable field references + * @param orderEquivInfoMap Order equivalence info to determine if output expression collation can + * be optimized to field collation + * @return List of SortExprDigest with stable field references or complex expressions + */ + private List extractSortExpressionInfos( + Sort sort, + Project project, + CalciteLogicalIndexScan scan, + Map>> orderEquivInfoMap) { + List sortExprDigests = new ArrayList<>(); + + List sortKeys = sort.getSortExps(); + List collations = sort.getCollation().getFieldCollations(); + + for (int i = 0; i < sortKeys.size(); i++) { + RexNode sortKey = sortKeys.get(i); + RelFieldCollation collation = collations.get(i); + + SortExprDigest info = mapThroughProject(sortKey, project, scan, collation, orderEquivInfoMap); + + if (info != null) { + sortExprDigests.add(info); + } + } + + return sortExprDigests; + } + + /** + * Map a sort key through the project to create a SortExprDigest. For simple field references, + * stores the field name for stability. For complex expressions, stores the RexNode. + * + * @param sortKey The sort key (usually a RexInputRef) + * @param project The project node + * @param scan The scan node to get field names from + * @param collation The collation information + * @param orderEquivInfoMap Order equivalence info to determine if output expression collation can + * be optimized to field collation + * @return SortExprDigest with stable field reference or complex expression + */ + private SortExprDigest mapThroughProject( + RexNode sortKey, + Project project, + CalciteLogicalIndexScan scan, + RelFieldCollation collation, + Map>> orderEquivInfoMap) { + assert sortKey instanceof RexInputRef : "sort key should be always RexInputRef"; + + RexInputRef inputRef = (RexInputRef) sortKey; + RexNode projectExpression = project.getProjects().get(inputRef.getIndex()); + // Get the field name from the scan's row type + List scanFieldNames = scan.getRowType().getFieldNames(); + + // If the project expression is a simple RexInputRef pointing to a scan field, + // or it can be optimized to sort by field, + // store the field name for stability + Optional> orderEquivalentInfo = + orderEquivInfoMap.get(collation.getFieldIndex()); + if (orderEquivalentInfo.isPresent()) { + Direction equivalentDirection = + orderEquivalentInfo.get().getRight() + ? collation.getDirection().reverse() + : collation.getDirection(); + // Create SortExprDigest with field name (stable reference) + return new SortExprDigest( + scanFieldNames.get(orderEquivalentInfo.get().getLeft()), + equivalentDirection, + collation.nullDirection); + } + + // For complex expressions, store the RexNode + return new SortExprDigest(projectExpression, collation.getDirection(), collation.nullDirection); + } + + /** + * Check if sort expressions can be pushed down to OpenSearch. Rejects literals and expressions + * that only contain literals. Only supports number and string types for sort scripts. + * + * @param sortExprDigests List of sort expression infos to check + * @return true if expressions can be pushed down, false otherwise + */ + private boolean canPushDownSortExpressionInfos(List sortExprDigests) { + for (SortExprDigest info : sortExprDigests) { + RexNode expr = info.getExpression(); + if (expr == null && StringUtils.isEmpty(info.getFieldName())) { + return false; + } else if (info.isSimpleFieldReference()) { + continue; + } + // Reject literals or constant expression - they don't provide meaningful sorting + if (expr instanceof RexLiteral + || RexUtil.isConstant(expr) + || !isSupportedSortScriptType(expr.getType().getSqlTypeName())) { + return false; + } + } + return true; + } + + /** + * Check if the SQL type is supported for OpenSearch sort scripts. Only number and string types + * are supported for sort script. + * + * @param sqlTypeName The SQL type name to check + * @return true if the type is supported for sort scripts, false otherwise + */ + private boolean isSupportedSortScriptType(SqlTypeName sqlTypeName) { + return SqlTypeName.CHAR_TYPES.contains(sqlTypeName) + || SqlTypeName.APPROX_TYPES.contains(sqlTypeName) + || SqlTypeName.INT_TYPES.contains(sqlTypeName); + } + + /** Rule configuration. */ + @Value.Immutable + public interface Config extends RelRule.Config { + SortExprIndexScanRule.Config DEFAULT = + ImmutableSortExprIndexScanRule.Config.builder() + .build() + .withOperandSupplier( + b0 -> + b0.operand(LogicalSort.class) + // Pure limit pushdown should be covered by SortProjectTransposeRule and + // OpenSearchLimitIndexScanRule + .predicate(sort -> !sort.collation.getFieldCollations().isEmpty()) + .oneInput( + b1 -> + b1.operand(LogicalProject.class) + .predicate(Predicate.not(Project::containsOver)) + .oneInput( + b2 -> + b2.operand(CalciteLogicalIndexScan.class) + .predicate( + AbstractCalciteIndexScan::noAggregatePushed) + .noInputs()))); + + @Override + default SortExprIndexScanRule toRule() { + return new SortExprIndexScanRule(this); + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index bd373c4b4c5..b4bf48bd880 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -151,7 +151,8 @@ else if (node instanceof RexInputRef ref) { .getReferenceForTermQuery()); } else if (node instanceof RexCall || node instanceof RexLiteral) { return scriptBuilder.apply( - (new PredicateAnalyzer.ScriptQueryExpression(node, rowType, fieldTypes, cluster)) + (new PredicateAnalyzer.ScriptQueryExpression( + node, rowType, fieldTypes, cluster, Collections.emptyMap())) .getScript()); } throw new IllegalStateException( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 3416515b08a..7a0a18c79ac 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -225,7 +225,18 @@ public void pushDownSort(List> sortBuilders) { } } - /** Pushdown size (limit) and from (offset) to DSL request. */ + /** + * Push down sort builder suppliers to DSL request. + * + * @param sortBuilderSuppliers a mixed of field sort builder suppliers and script sort builder + * suppliers + */ + public void pushDownSortSuppliers(List>> sortBuilderSuppliers) { + for (Supplier> sortBuilderSupplier : sortBuilderSuppliers) { + sourceBuilder.sort(sortBuilderSupplier.get()); + } + } + public void pushDownLimit(Integer limit, Integer offset) { // If there are multiple limit, we take the minimum among them // E.g. for `source=t | head 10 | head 5`, we take 5 diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 59051ca6ef0..94134ae0b4b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -52,6 +52,7 @@ import java.util.Collections; import java.util.GregorianCalendar; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -222,7 +223,8 @@ public static QueryExpression analyzeExpression( throw new ExpressionNotAnalyzableException("Can't convert " + expression, e); } try { - return new ScriptQueryExpression(expression, rowType, fieldTypes, cluster); + return new ScriptQueryExpression( + expression, rowType, fieldTypes, cluster, Collections.emptyMap()); } catch (Throwable e2) { throw new ExpressionNotAnalyzableException("Can't convert " + expression, e2); } @@ -794,7 +796,8 @@ public Expression tryAnalyzeOperand(RexNode node) { return qe; } catch (PredicateAnalyzerException firstFailed) { try { - QueryExpression qe = new ScriptQueryExpression(node, rowType, fieldTypes, cluster); + QueryExpression qe = + new ScriptQueryExpression(node, rowType, fieldTypes, cluster, Collections.emptyMap()); if (!qe.isPartial()) { qe.updateAnalyzedNodes(node); } @@ -1448,12 +1451,14 @@ public static class ScriptQueryExpression extends QueryExpression { private RexNode analyzedNode; // use lambda to generate code lazily to avoid store generated code private final Supplier codeGenerator; + private final Map params; public ScriptQueryExpression( RexNode rexNode, RelDataType rowType, Map fieldTypes, - RelOptCluster cluster) { + RelOptCluster cluster, + Map params) { // We prevent is_null(nested_field) from being pushed down because pushed-down scripts can not // access nested fields for the time being if (rexNode instanceof RexCall @@ -1467,6 +1472,7 @@ public ScriptQueryExpression( () -> SerializationWrapper.wrapWithLangType( ScriptEngineType.CALCITE, serializer.serialize(rexNode, rowType, fieldTypes)); + this.params = params; } @Override @@ -1480,12 +1486,14 @@ public Script getScript() { throw new UnsupportedScriptException( "ScriptQueryExpression requires a valid current time from hook, but it is not set"); } + Map mergedParams = new LinkedHashMap<>(params); + mergedParams.put(Variable.UTC_TIMESTAMP.camelName, currentTime); return new Script( DEFAULT_SCRIPT_TYPE, COMPOUNDED_LANG_NAME, codeGenerator.get(), Collections.emptyMap(), - Map.of(Variable.UTC_TIMESTAMP.camelName, currentTime)); + mergedParams); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 97ce0592c48..3f6b53b6c59 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -56,6 +56,7 @@ import org.opensearch.sql.opensearch.storage.scan.context.PushDownOperation; import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; import org.opensearch.sql.opensearch.storage.scan.context.RareTopDigest; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; /** An abstract relational operator representing a scan of an OpenSearchIndex type. */ @Getter @@ -115,7 +116,7 @@ public double estimateRowCount(RelMetadataQuery mq) { (rowCount, operation) -> switch (operation.type()) { case AGGREGATION -> mq.getRowCount((RelNode) operation.digest()); - case PROJECT, SORT -> rowCount; + case PROJECT, SORT, SORT_EXPR -> rowCount; case SORT_AGG_METRICS -> NumberUtil.min( rowCount, osIndex.getBucketSize().doubleValue()); // Refer the org.apache.calcite.rel.metadata.RelMdRowCount @@ -166,6 +167,13 @@ public double estimateRowCount(RelMetadataQuery mq) { case SORT_AGG_METRICS -> { dRows = dRows * .9 / 10; // *.9 because always bucket IS_NOT_NULL dCpu += dRows; + } + case SORT_EXPR -> { + @SuppressWarnings("unchecked") + List sortKeys = (List) operation.digest(); + long complexExprCount = + sortKeys.stream().filter(digest -> digest.getExpression() != null).count(); + dCpu += NumberUtil.multiply(dRows, 1.1 * complexExprCount); } // Refer the org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(Aggregate rel,...) case COLLAPSE -> { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 3d01c81aeae..073a4f1f29f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -6,9 +6,12 @@ package org.opensearch.sql.opensearch.storage.scan; import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.Supplier; import java.util.stream.Collectors; import lombok.Getter; import org.apache.calcite.plan.Convention; @@ -21,6 +24,7 @@ import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; @@ -31,15 +35,22 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.sort.ScriptSortBuilder.ScriptSortType; +import org.opensearch.search.sort.SortBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -48,6 +59,7 @@ import org.opensearch.sql.opensearch.planner.rules.EnumerableIndexScanRule; import org.opensearch.sql.opensearch.planner.rules.OpenSearchIndexRules; import org.opensearch.sql.opensearch.request.AggregateAnalyzer; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder.PushDownUnSupportedException; import org.opensearch.sql.opensearch.request.PredicateAnalyzer; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.QueryExpression; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -61,6 +73,7 @@ import org.opensearch.sql.opensearch.storage.scan.context.PushDownContext; import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; import org.opensearch.sql.opensearch.storage.scan.context.RareTopDigest; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; /** The logical relational operator representing a scan of an OpenSearchIndex type. */ @Getter @@ -124,7 +137,7 @@ public CalciteLogicalIndexScan copyWithNewTraitSet(RelTraitSet traitSet) { public void register(RelOptPlanner planner) { super.register(planner); planner.addRule(EnumerableIndexScanRule.DEFAULT_CONFIG.toRule()); - if (osIndex.getSettings().getSettingValue(Settings.Key.CALCITE_PUSHDOWN_ENABLED)) { + if ((Boolean) osIndex.getSettings().getSettingValue(Settings.Key.CALCITE_PUSHDOWN_ENABLED)) { // When pushdown is enabled, use normal rules (they handle everything including relevance // functions) for (RelOptRule rule : OpenSearchIndexRules.OPEN_SEARCH_INDEX_SCAN_RULES) { @@ -253,7 +266,7 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { newSchema, pushDownContext.clone()); - AbstractAction action; + AbstractAction action; if (pushDownContext.isAggregatePushed()) { // For aggregate, we do nothing on query builder but only change the schema of the scan. action = (AggregationBuilderAction) aggAction -> {}; @@ -425,4 +438,95 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of } return null; } + + /** + * Push down sort expressions to OpenSearch level. Supports mixed RexCall and field sort + * expressions. + * + * @param sortExprDigests List of SortExprDigest with expressions and collation information + * @return CalciteLogicalIndexScan with sort expressions pushed down, or null if pushdown fails + */ + public CalciteLogicalIndexScan pushdownSortExpr(List sortExprDigests) { + try { + if (sortExprDigests == null || sortExprDigests.isEmpty()) { + return null; + } + + CalciteLogicalIndexScan newScan = + new CalciteLogicalIndexScan( + getCluster(), + traitSet, + hints, + table, + osIndex, + getRowType(), + pushDownContext.cloneWithoutSort()); + + List>> sortBuilderSuppliers = new ArrayList<>(); + for (SortExprDigest digest : sortExprDigests) { + SortOrder order = + Direction.DESCENDING.equals(digest.getDirection()) ? SortOrder.DESC : SortOrder.ASC; + + if (digest.isSimpleFieldReference()) { + String missing = + switch (digest.getNullDirection()) { + case FIRST -> "_first"; + case LAST -> "_last"; + default -> null; + }; + sortBuilderSuppliers.add( + () -> SortBuilders.fieldSort(digest.getFieldName()).order(order).missing(missing)); + continue; + } + RexNode sortExpr = digest.getExpression(); + assert sortExpr instanceof RexCall : "sort expression should be RexCall"; + Map directionParams = new LinkedHashMap<>(); + directionParams.put(PlanUtils.NULL_DIRECTION, digest.getNullDirection().name()); + directionParams.put(PlanUtils.DIRECTION, digest.getDirection().name()); + // Complex expression - use ScriptQueryExpression to generate script for sort + PredicateAnalyzer.ScriptQueryExpression scriptExpr = + new PredicateAnalyzer.ScriptQueryExpression( + digest.getExpression(), + rowType, + osIndex.getAllFieldTypes(), + getCluster(), + directionParams); + // Determine the correct ScriptSortType based on the expression's return type + ScriptSortType sortType = getScriptSortType(sortExpr.getType()); + + sortBuilderSuppliers.add( + () -> SortBuilders.scriptSort(scriptExpr.getScript(), sortType).order(order)); + } + + // Create action to push down sort expressions to OpenSearch + OSRequestBuilderAction action = + requestBuilder -> requestBuilder.pushDownSortSuppliers(sortBuilderSuppliers); + + newScan.pushDownContext.add(PushDownType.SORT_EXPR, sortExprDigests, action); + return newScan; + } catch (Exception e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Cannot pushdown sort expressions: {}", sortExprDigests, e); + } + } + return null; + } + + /** + * Determine the appropriate ScriptSortType based on the expression's return type. + * + * @param relDataType the return type of the expression + * @return the appropriate ScriptSortType + */ + private ScriptSortType getScriptSortType(RelDataType relDataType) { + if (SqlTypeName.CHAR_TYPES.contains(relDataType.getSqlTypeName())) { + return ScriptSortType.STRING; + } else if (SqlTypeName.INT_TYPES.contains(relDataType.getSqlTypeName()) + || SqlTypeName.APPROX_TYPES.contains(relDataType.getSqlTypeName())) { + return ScriptSortType.NUMBER; + } else { + throw new PushDownUnSupportedException( + "Unsupported type for sort expression pushdown: " + relDataType); + } + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 9098d1ca17c..4a2ade440cf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -30,6 +30,7 @@ public class PushDownContext extends AbstractCollection { private boolean isProjectPushed = false; private boolean isMeasureOrderPushed = false; private boolean isSortPushed = false; + private boolean isSortExprPushed = false; private boolean isTopKPushed = false; private boolean isRareTopPushed = false; @@ -53,7 +54,7 @@ public PushDownContext clone() { public PushDownContext cloneWithoutSort() { PushDownContext newContext = new PushDownContext(osIndex); for (PushDownOperation action : this) { - if (action.type() != PushDownType.SORT) { + if (action.type() != PushDownType.SORT && action.type() != PushDownType.SORT_EXPR) { newContext.add(action); } } @@ -101,7 +102,7 @@ public boolean add(PushDownOperation operation) { } if (operation.type() == PushDownType.LIMIT) { isLimitPushed = true; - if (isSortPushed || isMeasureOrderPushed) { + if (isSortPushed || isMeasureOrderPushed || isSortExprPushed) { isTopKPushed = true; } } @@ -111,6 +112,9 @@ public boolean add(PushDownOperation operation) { if (operation.type() == PushDownType.SORT) { isSortPushed = true; } + if (operation.type() == PushDownType.SORT_EXPR) { + isSortExprPushed = true; + } if (operation.type() == PushDownType.SORT_AGG_METRICS) { isMeasureOrderPushed = true; } @@ -128,6 +132,20 @@ public boolean containsDigest(Object digest) { return this.stream().anyMatch(action -> action.digest().equals(digest)); } + /** + * Get the digest of the first operation of a specific type. + * + * @param type The PushDownType to get the digest for + * @return The digest object, or null if no operation of the specified type exists + */ + public Object getDigestByType(PushDownType type) { + return this.stream() + .filter(operation -> operation.type() == type) + .map(PushDownOperation::digest) + .findFirst() + .orElse(null); + } + public OpenSearchRequestBuilder createRequestBuilder() { OpenSearchRequestBuilder newRequestBuilder = osIndex.createRequestBuilder(); if (operationsForRequestBuilder != null) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java index ddb0a3d7e66..81927e9f8d6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java @@ -16,6 +16,7 @@ public enum PushDownType { COLLAPSE, SORT_AGG_METRICS, // convert composite aggregate to terms or multi-terms bucket aggregate RARE_TOP, // convert composite aggregate to nested aggregate + SORT_EXPR // HIGHLIGHT, // NESTED } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java new file mode 100644 index 00000000000..5b7fb1db320 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java @@ -0,0 +1,105 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan.context; + +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.Getter; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rex.RexNode; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; + +/** + * Information about a sort expression that has been pushed down to OpenSearch. Contains both the + * expression and its collation information. For simple field references, stores the field name for + * stability across schema changes. + */ +@Getter +@AllArgsConstructor +public class SortExprDigest { + /** The RexNode expression being sorted (nullable for simple field references) */ + private final RexNode expression; + + /** The field name for simple field references (nullable for complex expressions) */ + private final String fieldName; + + /** The collation information (direction, null handling) */ + private final RelFieldCollation.Direction direction; + + /** The null direction */ + private final RelFieldCollation.NullDirection nullDirection; + + /** + * Constructor for complex expressions. + * + * @param expression The RexNode expression + * @param direction Sort direction + * @param nullDirection Null handling direction + */ + public SortExprDigest( + RexNode expression, + RelFieldCollation.Direction direction, + RelFieldCollation.NullDirection nullDirection) { + this(expression, null, direction, nullDirection); + } + + /** + * Constructor for simple field references. + * + * @param fieldName The field name + * @param direction Sort direction + * @param nullDirection Null handling direction + */ + public SortExprDigest( + String fieldName, + RelFieldCollation.Direction direction, + RelFieldCollation.NullDirection nullDirection) { + this(null, fieldName, direction, nullDirection); + } + + /** + * Check if this is a simple field reference. + * + * @return true if this represents a simple field reference, false for complex expressions + */ + public boolean isSimpleFieldReference() { + return expression == null && !StringUtils.isEmpty(fieldName); + } + + /** + * Get the effective expression for this sort info. For simple field references, creates a + * RexInputRef based on the current scan schema. + * + * @param scan The scan to get the current schema from + * @return The RexNode expression to use for sorting + */ + public RexNode getEffectiveExpression(AbstractCalciteIndexScan scan) { + if (isSimpleFieldReference()) { + // Find the field index in the current scan schema + List currentFieldNames = scan.getRowType().getFieldNames(); + int fieldIndex = currentFieldNames.indexOf(fieldName); + if (fieldIndex >= 0) { + // Create a RexInputRef for this field + return scan.getCluster() + .getRexBuilder() + .makeInputRef(scan.getRowType().getFieldList().get(fieldIndex).getType(), fieldIndex); + } + // Field not found in current schema - this shouldn't happen in normal cases + return null; + } else { + // Complex expression - return as-is + return expression; + } + } + + @Override + public String toString() { + String sortTarget = isSimpleFieldReference() ? fieldName : expression.toString(); + return String.format( + "%s %s NULLS_%s", sortTarget, direction.toString(), nullDirection.toString()); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java index a03fb6268ba..1644d9d0509 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java @@ -77,8 +77,10 @@ import org.opensearch.index.fielddata.ScriptDocValues; import org.opensearch.script.AggregationScript; import org.opensearch.script.FilterScript; +import org.opensearch.script.NumberSortScript; import org.opensearch.script.ScriptContext; import org.opensearch.script.ScriptEngine; +import org.opensearch.script.StringSortScript; import org.opensearch.search.lookup.SourceLookup; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.data.model.ExprIpValue; @@ -88,6 +90,8 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.storage.script.aggregation.CalciteAggregationScriptFactory; import org.opensearch.sql.opensearch.storage.script.filter.CalciteFilterScriptFactory; +import org.opensearch.sql.opensearch.storage.script.sort.CalciteNumberSortScriptFactory; +import org.opensearch.sql.opensearch.storage.script.sort.CalciteStringSortScriptFactory; import org.opensearch.sql.opensearch.storage.serde.RelJsonSerializer; /** @@ -115,6 +119,8 @@ public CalciteScriptEngine(RelOptCluster relOptCluster) { BiFunction, RelDataType, Object>>() .put(FilterScript.CONTEXT, CalciteFilterScriptFactory::new) .put(AggregationScript.CONTEXT, CalciteAggregationScriptFactory::new) + .put(NumberSortScript.CONTEXT, CalciteNumberSortScriptFactory::new) + .put(StringSortScript.CONTEXT, CalciteStringSortScriptFactory::new) .build(); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java new file mode 100644 index 00000000000..f368ac184eb --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java @@ -0,0 +1,64 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelFieldCollation.NullDirection; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.NumberSortScript; +import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.lookup.SourceLookup; +import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.script.core.CalciteScript; + +/** Calcite number sort script. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteNumberSortScript extends NumberSortScript { + + /** Calcite script. */ + private final CalciteScript calciteScript; + + private final SourceLookup sourceLookup; + private final Direction direction; + private final NullDirection nullDirection; + + public CalciteNumberSortScript( + Function1 function, + SearchLookup lookup, + LeafReaderContext context, + Map params) { + super(params, lookup, context); + this.calciteScript = new CalciteScript(function, params); + // TODO: we'd better get source from the leafLookup of super once it's available + this.sourceLookup = lookup.getLeafSearchLookup(context).source(); + this.direction = + params.containsKey(PlanUtils.DIRECTION) + ? Direction.valueOf((String) params.get(PlanUtils.DIRECTION)) + : Direction.ASCENDING; + this.nullDirection = + params.containsKey(PlanUtils.NULL_DIRECTION) + ? NullDirection.valueOf((String) params.get(PlanUtils.NULL_DIRECTION)) + : NullDirection.FIRST; + } + + @Override + public double execute() { + Object value = calciteScript.execute(this.getDoc(), this.sourceLookup)[0]; + // There is a limitation here when the Double value is exactly theoretical min/max value. + // It can't distinguish the ordering between null and exact Double.NEGATIVE_INFINITY or + // Double.NaN. + if (value == null) { + boolean isAscending = direction == Direction.ASCENDING; + boolean isNullFirst = nullDirection == NullDirection.FIRST; + return isAscending == isNullFirst ? Double.NEGATIVE_INFINITY : Double.NaN; + } + return ((Number) value).doubleValue(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptFactory.java new file mode 100644 index 00000000000..426fa5472b7 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptFactory.java @@ -0,0 +1,38 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.type.RelDataType; +import org.opensearch.script.NumberSortScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite number sort script factory that generates leaf factory. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteNumberSortScriptFactory implements NumberSortScript.Factory { + + /** Generated code of calcite to execute. */ + private final Function1 function; + + public CalciteNumberSortScriptFactory( + Function1 function, RelDataType type) { + this.function = function; + } + + @Override + public boolean isResultDeterministic() { + // This implies the results are cacheable + return true; + } + + @Override + public NumberSortScript.LeafFactory newFactory(Map params, SearchLookup lookup) { + return new CalciteNumberSortScriptLeafFactory(function, params, lookup); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java new file mode 100644 index 00000000000..703a8946e43 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.io.IOException; +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.NumberSortScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite number sort script leaf factory that produces script executor for each leaf. */ +@EqualsAndHashCode(callSuper = false) +class CalciteNumberSortScriptLeafFactory implements NumberSortScript.LeafFactory { + + private final Function1 function; + + /** Parameters for the calcite script. */ + private final Map params; + + /** Document lookup that returns doc values. */ + private final SearchLookup lookup; + + public CalciteNumberSortScriptLeafFactory( + Function1 function, Map params, SearchLookup lookup) { + this.function = function; + this.params = params; + this.lookup = lookup; + } + + @Override + public NumberSortScript newInstance(LeafReaderContext context) throws IOException { + return new CalciteNumberSortScript(function, lookup, context, params); + } + + @Override + public boolean needs_score() { + return false; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java new file mode 100644 index 00000000000..8e73cc0da97 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java @@ -0,0 +1,66 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelFieldCollation.NullDirection; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.StringSortScript; +import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.lookup.SourceLookup; +import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.opensearch.storage.script.core.CalciteScript; + +/** Calcite string sort script. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteStringSortScript extends StringSortScript { + + /** Calcite script. */ + private final CalciteScript calciteScript; + + private final SourceLookup sourceLookup; + private final Direction direction; + private final NullDirection nullDirection; + + private static final String MAX_SENTINEL = "\uFFFF\uFFFF_NULL_PLACEHOLDER_"; + private static final String MIN_SENTINEL = "\u0000\u0000_NULL_PLACEHOLDER_"; + + public CalciteStringSortScript( + Function1 function, + SearchLookup lookup, + LeafReaderContext context, + Map params) { + super(params, lookup, context); + this.calciteScript = new CalciteScript(function, params); + // TODO: we'd better get source from the leafLookup of super once it's available + this.sourceLookup = lookup.getLeafSearchLookup(context).source(); + this.direction = + params.containsKey(PlanUtils.DIRECTION) + ? Direction.valueOf((String) params.get(PlanUtils.DIRECTION)) + : Direction.ASCENDING; + this.nullDirection = + params.containsKey(PlanUtils.NULL_DIRECTION) + ? NullDirection.valueOf((String) params.get(PlanUtils.NULL_DIRECTION)) + : NullDirection.FIRST; + } + + @Override + public String execute() { + Object value = calciteScript.execute(this.getDoc(), this.sourceLookup)[0]; + // There is a limitation here when the String value is larger or smaller than sentinel values. + // It can't guarantee the lexigraphic ordering between null and special strings. + if (value == null) { + boolean isAscending = direction == Direction.ASCENDING; + boolean isNullFirst = nullDirection == NullDirection.FIRST; + return isAscending == isNullFirst ? MIN_SENTINEL : MAX_SENTINEL; + } + return value.toString(); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptFactory.java new file mode 100644 index 00000000000..053361724e7 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptFactory.java @@ -0,0 +1,38 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.calcite.rel.type.RelDataType; +import org.opensearch.script.StringSortScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite string sort script factory that generates leaf factory. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteStringSortScriptFactory implements StringSortScript.Factory { + + /** Generated code of calcite to execute. */ + private final Function1 function; + + public CalciteStringSortScriptFactory( + Function1 function, RelDataType type) { + this.function = function; + } + + @Override + public boolean isResultDeterministic() { + // This implies the results are cacheable + return true; + } + + @Override + public StringSortScript.LeafFactory newFactory(Map params, SearchLookup lookup) { + return new CalciteStringSortScriptLeafFactory(function, params, lookup); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java new file mode 100644 index 00000000000..a95ee30d59d --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.sort; + +import java.io.IOException; +import java.util.Map; +import lombok.EqualsAndHashCode; +import org.apache.calcite.DataContext; +import org.apache.calcite.linq4j.function.Function1; +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.StringSortScript; +import org.opensearch.search.lookup.SearchLookup; + +/** Calcite string sort script leaf factory. */ +@EqualsAndHashCode(callSuper = false) +public class CalciteStringSortScriptLeafFactory implements StringSortScript.LeafFactory { + + /** Generated code of calcite to execute. */ + private final Function1 function; + + /** Script parameters. */ + private final Map params; + + /** Search lookup. */ + private final SearchLookup lookup; + + public CalciteStringSortScriptLeafFactory( + Function1 function, Map params, SearchLookup lookup) { + this.function = function; + this.params = params; + this.lookup = lookup; + } + + @Override + public StringSortScript newInstance(LeafReaderContext context) throws IOException { + return new CalciteStringSortScript(function, lookup, context, params); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java index ab3743aeaf4..90738a267ff 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java @@ -10,9 +10,14 @@ import java.util.BitSet; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import lombok.experimental.UtilityClass; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; @@ -30,6 +35,9 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; @UtilityClass public class OpenSearchRelOptUtil { @@ -136,6 +144,33 @@ public static Optional> getOrderEquivalentInputInfo(RexNo } } + /** + * Suppose single project input is already sorted, this method evaluates whether the input field + * sort collation satisfies the simple project expression's output collation. + * + * @param sourceFieldCollation project input field collation + * @param targetFieldCollation simple project expression output collation + * @param orderEquivInfo equivalent order information that contains optional input index and + * reversed flag pair + * @return if single project input collation satisfies project expression output collation + */ + public boolean sourceCollationSatisfiesTargetCollation( + RelFieldCollation sourceFieldCollation, + RelFieldCollation targetFieldCollation, + Optional> orderEquivInfo) { + if (orderEquivInfo.isEmpty()) { + return false; + } + + int equivalentSourceIndex = orderEquivInfo.get().getLeft(); + Direction equivalentSourceDirection = + orderEquivInfo.get().getRight() + ? targetFieldCollation.getDirection().reverse() + : targetFieldCollation.getDirection(); + return equivalentSourceIndex == sourceFieldCollation.getFieldIndex() + && equivalentSourceDirection == sourceFieldCollation.getDirection(); + } + private static boolean isOrderPreservingCast(RelDataType src, RelDataType dst) { final SqlTypeName srcType = src.getSqlTypeName(); final SqlTypeName dstType = dst.getSqlTypeName(); @@ -274,4 +309,77 @@ private static String generateUniqueName(String baseName, Set usedNames) suffix++; } } + + /** + * Check if the scan can provide the required sort collation by matching toCollation's mapped + * project RexNodes with sort expressions from PushDownContext. + * + * @param scan The scan RelNode to check + * @param project The project node to match expressions against + * @param toCollation The required collation to match + * @param orderEquivInfoMap Order equivalence info to determine if output expression collation can + * be optimized to field collation + * @return true if scan can provide the required collation, false otherwise + */ + public static boolean canScanProvideSortCollation( + AbstractCalciteIndexScan scan, + Project project, + RelCollation toCollation, + Map>> orderEquivInfoMap) { + + // Check if the scan has sort expressions pushed down + if (scan.getPushDownContext().stream() + .noneMatch(operation -> operation.type() == PushDownType.SORT_EXPR)) { + return false; + } + + // Get the sort expression infos from the pushdown context + @SuppressWarnings("unchecked") + List sortExprDigests = + (List) scan.getPushDownContext().getDigestByType(PushDownType.SORT_EXPR); + if (sortExprDigests.isEmpty() + || sortExprDigests.size() < toCollation.getFieldCollations().size()) { + return false; + } + + for (int i = 0; i < toCollation.getFieldCollations().size(); i++) { + RelFieldCollation requiredFieldCollation = toCollation.getFieldCollations().get(i); + RexNode projectExpr = project.getProjects().get(requiredFieldCollation.getFieldIndex()); + SortExprDigest scanSortInfo = sortExprDigests.get(i); + // Get the effective expression for comparison + RexNode scanSortExpression = scanSortInfo.getEffectiveExpression(scan); + + // Check if the required project output matches the scan sort expression + if (scanSortExpression != null && scanSortExpression.equals(projectExpr)) { + // Check if the collation direction and null handling match + if (requiredFieldCollation.getDirection() == scanSortInfo.getDirection() + && requiredFieldCollation.nullDirection == scanSortInfo.getNullDirection()) { + // Direction or null handling mismatch + continue; + } + return false; + } + + // Check if sorting simple RexCall is equivalent to field sort + if (scanSortExpression instanceof RexInputRef && projectExpr instanceof RexCall) { + RexInputRef scanInputRef = (RexInputRef) scanSortExpression; + RelFieldCollation sourceCollation = + new RelFieldCollation( + scanInputRef.getIndex(), + scanSortInfo.getDirection(), + scanSortInfo.getNullDirection()); + if (sourceCollationSatisfiesTargetCollation( + sourceCollation, + requiredFieldCollation, + orderEquivInfoMap.get(requiredFieldCollation.getFieldIndex()))) { + continue; + } + } + + return false; + } + + // All required collations are matched + return true; + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java index a9790d6485e..9a78bbc813e 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtilTest.java @@ -8,10 +8,21 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelFieldCollation.NullDirection; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeSystem; @@ -26,6 +37,11 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownContext; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownOperation; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; +import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; @ExtendWith(MockitoExtension.class) public class OpenSearchRelOptUtilTest { @@ -325,4 +341,399 @@ public void testNoDots() { List result = OpenSearchRelOptUtil.resolveColumnNameConflicts(input); assertEquals(expected, result); } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectInputRef() { + // Source collation: col0 ASC + // Target collation: col0 ASC (output index 0) + Optional> orderEquivInfo = Optional.of(Pair.of(0, false)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.ASCENDING); + + assertTrue( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_EmptyOrderEquivInfo() { + Optional> orderEquivInfo = Optional.empty(); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.DESCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.DESCENDING); + + assertFalse( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectInputRefDescending() { + // Source collation: col0 DESC + // Target collation: col0 DESC (output index 0) + Optional> orderEquivInfo = Optional.of(Pair.of(0, false)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.DESCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.DESCENDING); + + assertTrue( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectionMismatch() { + // Source collation: col0 ASC + // Target collation: col0 DESC (output index 0) + Optional> orderEquivInfo = Optional.of(Pair.of(0, false)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.DESCENDING); + + assertFalse( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_IndexMismatch() { + // Source collation: col0 ASC + // Target collation: col1 ASC (output index 1) + Optional> orderEquivInfo = Optional.of(Pair.of(1, false)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(1, Direction.ASCENDING); + + assertFalse( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectionFlipped() { + // Source collation: col0 ASC + // Target collation: -col0 DESC (output index 0) + Optional> orderEquivInfo = Optional.of(Pair.of(0, true)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.DESCENDING); + + assertTrue( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testSourceCollationSatisfiesTargetCollation_DirectionFlippedMismatched() { + // Source collation: col0 ASC + // Target collation: -col0 ASC (output index 0) - should be DESC + Optional> orderEquivInfo = Optional.of(Pair.of(0, true)); + + RelFieldCollation sourceCollation = new RelFieldCollation(0, Direction.ASCENDING); + RelFieldCollation targetCollation = new RelFieldCollation(0, Direction.ASCENDING); + + assertFalse( + OpenSearchRelOptUtil.sourceCollationSatisfiesTargetCollation( + sourceCollation, targetCollation, orderEquivInfo)); + } + + @Test + public void testCanScanProvideSortCollation_EmptySortExprDigests() { + Map>> orderEquivInfoMap = Collections.emptyMap(); + AbstractCalciteIndexScan scan = createMockScanWithSort(Collections.emptyList()); + Project project = createMockProject(Arrays.asList(rexBuilder.makeInputRef(inputType, 0))); + RelCollation collation = RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_InsufficientSortExprDigests() { + // Scan has 1 sort expression, but collation requires 2 + Map>> orderEquivInfoMap = Collections.emptyMap(); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = + createMockProject( + Arrays.asList( + rexBuilder.makeInputRef(inputType, 0), rexBuilder.makeInputRef(inputType, 1))); + RelCollation collation = + RelCollations.of( + new RelFieldCollation(0, Direction.ASCENDING), + new RelFieldCollation(1, Direction.ASCENDING)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ExactMatch() { + // Scan sorts by col0 ASC, project outputs col0, collation requires col0 ASC + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = rexBuilder.makeInputRef(inputType, 0); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_DirectionMismatch() { + // Scan sorts by col0 ASC, but collation requires col0 DESC + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = rexBuilder.makeInputRef(inputType, 0); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.DESCENDING, NullDirection.LAST)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_NullDirectionMismatch() { + // Scan sorts by col0 ASC NULLS LAST, but collation requires NULLS FIRST + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = rexBuilder.makeInputRef(inputType, 0); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.FIRST)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ProjectTransformation() { + // Scan sorts by col0 ASC, project outputs -col0, collation requires -col0 DESC + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, true))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = + rexBuilder.makeCall(SqlStdOperatorTable.UNARY_MINUS, rexBuilder.makeInputRef(inputType, 0)); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.DESCENDING, NullDirection.LAST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ExpressionMismatch() { + // Scan sorts by col0, but project outputs col1 + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(1, false))); + RexNode scanExpr = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr = rexBuilder.makeInputRef(inputType, 1); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ComplexRexCall() { + // Scan sorts by (col0 + col1) ASC, project outputs (col0 + col1), collation requires (col0 + + // col1) ASC + Map>> orderEquivInfoMap = Map.of(0, Optional.empty()); + RexNode scanExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeInputRef(inputType, 1)); + RexNode projectExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeInputRef(inputType, 1)); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ComplexRexCall_DifferentExpression() { + // Scan sorts by (col0 + 10), but project outputs (col0 + 20) - should not match + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeLiteral(10, inputType)); + RexNode projectExpr = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeLiteral(20, inputType)); + SortExprDigest sortDigest = + new SortExprDigest(scanExpr, Direction.ASCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest)); + + Project project = createMockProject(Arrays.asList(projectExpr)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertFalse( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ComplexRexCall_MixedSimpleAndComplex() { + // Scan sorts by col0 ASC, (col1 + 5) DESC + // Project outputs col0, (col1 + 5) + // Collation requires col0 ASC, (col1 + 5) DESC + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr0 = rexBuilder.makeInputRef(inputType, 0); + RexNode scanExpr1 = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 1), + rexBuilder.makeLiteral(5, inputType)); + + RexNode projectExpr0 = rexBuilder.makeInputRef(inputType, 0); + RexNode projectExpr1 = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 1), + rexBuilder.makeLiteral(5, inputType)); + + SortExprDigest sortDigest0 = + new SortExprDigest(scanExpr0, Direction.ASCENDING, NullDirection.LAST); + SortExprDigest sortDigest1 = + new SortExprDigest(scanExpr1, Direction.DESCENDING, NullDirection.FIRST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest0, sortDigest1)); + + Project project = createMockProject(Arrays.asList(projectExpr0, projectExpr1)); + RelCollation collation = + RelCollations.of( + new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST), + new RelFieldCollation(1, Direction.DESCENDING, NullDirection.FIRST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + @Test + public void testCanScanProvideSortCollation_ComplexRexCall_PartialMatch() { + // Scan sorts by (col0 + 10) ASC, col1 DESC + // Project outputs (col0 + 10), col1 + // Collation requires only (col0 + 10) ASC - should match (prefix match) + Map>> orderEquivInfoMap = + Map.of(0, Optional.of(Pair.of(0, false))); + RexNode scanExpr0 = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeLiteral(10, inputType)); + RexNode scanExpr1 = rexBuilder.makeInputRef(inputType, 1); + + RexNode projectExpr0 = + rexBuilder.makeCall( + SqlStdOperatorTable.PLUS, + rexBuilder.makeInputRef(inputType, 0), + rexBuilder.makeLiteral(10, inputType)); + RexNode projectExpr1 = rexBuilder.makeInputRef(inputType, 1); + + SortExprDigest sortDigest0 = + new SortExprDigest(scanExpr0, Direction.ASCENDING, NullDirection.LAST); + SortExprDigest sortDigest1 = + new SortExprDigest(scanExpr1, Direction.DESCENDING, NullDirection.LAST); + AbstractCalciteIndexScan scan = createMockScanWithSort(Arrays.asList(sortDigest0, sortDigest1)); + + Project project = createMockProject(Arrays.asList(projectExpr0, projectExpr1)); + RelCollation collation = + RelCollations.of(new RelFieldCollation(0, Direction.ASCENDING, NullDirection.LAST)); + + assertTrue( + OpenSearchRelOptUtil.canScanProvideSortCollation( + scan, project, collation, orderEquivInfoMap)); + } + + private Project createMockProject(List projects) { + Project project = mock(Project.class, org.mockito.Mockito.withSettings().lenient()); + when(project.getProjects()).thenReturn(projects); + return project; + } + + // Create mock scan with list of sortExprDigest + private AbstractCalciteIndexScan createMockScanWithSort(List sortDigests) { + AbstractCalciteIndexScan scan = + mock(AbstractCalciteIndexScan.class, org.mockito.Mockito.withSettings().lenient()); + PushDownContext context = + mock(PushDownContext.class, org.mockito.Mockito.withSettings().lenient()); + PushDownOperation sortOperation = + mock(PushDownOperation.class, org.mockito.Mockito.withSettings().lenient()); + + when(scan.getPushDownContext()).thenReturn(context); + when(context.stream()).thenReturn(Arrays.asList(sortOperation).stream()); + when(sortOperation.type()).thenReturn(PushDownType.SORT_EXPR); + when(context.getDigestByType(PushDownType.SORT_EXPR)).thenReturn(sortDigests); + + // Mock the cluster and RexBuilder for getEffectiveExpression + RelOptCluster cluster = mock(RelOptCluster.class, org.mockito.Mockito.withSettings().lenient()); + when(scan.getCluster()).thenReturn(cluster); + when(cluster.getRexBuilder()).thenReturn(rexBuilder); + + // Mock the row type + RelDataType rowType = + typeFactory.builder().add("col0", inputType).add("col1", inputType).build(); + when(scan.getRowType()).thenReturn(rowType); + + return scan; + } } From b45b9f58c1c18a7aa807bf2c6348d328c85dff28 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Tue, 18 Nov 2025 15:33:30 -0800 Subject: [PATCH 64/99] Support escaped field names in SPath parsing (#4813) --- .../org/opensearch/sql/ast/tree/SPath.java | 7 +++-- docs/category.json | 1 + docs/user/dql/metadata.rst | 3 ++- docs/user/ppl/cmd/spath.rst | 27 +++++++++++++++---- doctest/test_data/structured.json | 3 +++ doctest/test_docs.py | 1 + doctest/test_mapping/structured.json | 20 ++++++++++++++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../sql/ppl/utils/SPathRewriteTest.java | 17 ++++++++++++ 9 files changed, 72 insertions(+), 8 deletions(-) create mode 100644 doctest/test_data/structured.json create mode 100644 doctest/test_mapping/structured.json diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java index 89eab6cf166..a1c0c08a15f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/SPath.java @@ -5,6 +5,8 @@ package org.opensearch.sql.ast.tree; +import static org.opensearch.sql.common.utils.StringUtils.unquoteText; + import com.google.common.collect.ImmutableList; import java.util.List; import lombok.AllArgsConstructor; @@ -48,8 +50,9 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { public Eval rewriteAsEval() { String outField = this.outField; + String unquotedPath = unquoteText(this.path); if (outField == null) { - outField = this.path; + outField = unquotedPath; } return AstDSL.eval( @@ -57,6 +60,6 @@ public Eval rewriteAsEval() { AstDSL.let( AstDSL.field(outField), AstDSL.function( - "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(this.path)))); + "json_extract", AstDSL.field(inField), AstDSL.stringLiteral(unquotedPath)))); } } diff --git a/docs/category.json b/docs/category.json index f126904da6a..f3fe70ecfa5 100644 --- a/docs/category.json +++ b/docs/category.json @@ -46,6 +46,7 @@ "user/ppl/cmd/search.rst", "user/ppl/cmd/showdatasources.rst", "user/ppl/cmd/sort.rst", + "user/ppl/cmd/spath.rst", "user/ppl/cmd/stats.rst", "user/ppl/cmd/streamstats.rst", "user/ppl/cmd/subquery.rst", diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index 7584c72505e..e959a69c8b6 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 22/22 + fetched rows / total rows = 23/23 +----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -54,6 +54,7 @@ SQL query:: | docTestCluster | null | otellogs | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | state_country | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | structured | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_data | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_data2 | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | time_test | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/spath.rst b/docs/user/ppl/cmd/spath.rst index 7defb4437f2..85ba328c27b 100644 --- a/docs/user/ppl/cmd/spath.rst +++ b/docs/user/ppl/cmd/spath.rst @@ -37,10 +37,10 @@ The simplest spath is to extract a single field. This extracts `n` from the `doc PPL query:: - PPL> source=test_spath | spath input=doc n; + os> source=structured | spath input=doc_n n | fields doc_n n; fetched rows / total rows = 3/3 +----------+---+ - | doc | n | + | doc_n | n | |----------+---| | {"n": 1} | 1 | | {"n": 2} | 2 | @@ -54,10 +54,10 @@ These queries demonstrate more JSON path uses, like traversing nested fields and PPL query:: - PPL> source=test_spath | spath input=doc output=first_element list{0} | spath input=doc output=all_elements list{} | spath input=doc output=nested nest_out.nest_in; + os> source=structured | spath input=doc_list output=first_element list{0} | spath input=doc_list output=all_elements list{} | spath input=doc_list output=nested nest_out.nest_in | fields doc_list first_element all_elements nested; fetched rows / total rows = 3/3 +------------------------------------------------------+---------------+--------------+--------+ - | doc | first_element | all_elements | nested | + | doc_list | first_element | all_elements | nested | |------------------------------------------------------+---------------+--------------+--------| | {"list": [1, 2, 3, 4], "nest_out": {"nest_in": "a"}} | 1 | [1,2,3,4] | a | | {"list": [], "nest_out": {"nest_in": "a"}} | null | [] | a | @@ -71,10 +71,27 @@ The example shows extracting an inner field and doing statistics on it, using th PPL query:: - PPL> source=test_spath | spath input=doc n | eval n=cast(n as int) | stats sum(n); + os> source=structured | spath input=doc_n n | eval n=cast(n as int) | stats sum(n) | fields `sum(n)`; fetched rows / total rows = 1/1 +--------+ | sum(n) | |--------| | 6 | +--------+ + +Example 4: Escaped paths +============================ + +`spath` can escape paths with strings to accept any path that `json_extract` does. This includes escaping complex field names as array components. + +PPL query:: + + os> source=structured | spath output=a input=doc_escape "['a fancy field name']" | spath output=b input=doc_escape "['a.b.c']" | fields a b; + fetched rows / total rows = 3/3 + +-------+---+ + | a | b | + |-------+---| + | true | 0 | + | true | 1 | + | false | 2 | + +-------+---+ diff --git a/doctest/test_data/structured.json b/doctest/test_data/structured.json new file mode 100644 index 00000000000..c0717c6f328 --- /dev/null +++ b/doctest/test_data/structured.json @@ -0,0 +1,3 @@ +{"doc_n":"{\"n\": 1}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 0}","doc_list":"{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "a"}} +{"doc_n":"{\"n\": 2}","doc_escape":"{\"a fancy field name\": true,\"a.b.c\": 1}","doc_list":"{\"list\": [], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "b"}} +{"doc_n":"{\"n\": 3}","doc_escape":"{\"a fancy field name\": false,\"a.b.c\": 2}","doc_list":"{\"list\": [5, 6], \"nest_out\": {\"nest_in\": \"a\"}}","obj_field":{"field": "c"}} \ No newline at end of file diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 4fd9c230ff6..d3cea5782b5 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -37,6 +37,7 @@ 'weblogs': 'weblogs.json', 'json_test': 'json_test.json', 'state_country': 'state_country.json', + 'structured': 'structured.json', 'occupation': 'occupation.json', 'worker': 'worker.json', 'work_information': 'work_information.json', diff --git a/doctest/test_mapping/structured.json b/doctest/test_mapping/structured.json new file mode 100644 index 00000000000..5c79e53dc0a --- /dev/null +++ b/doctest/test_mapping/structured.json @@ -0,0 +1,20 @@ +{ + "mappings": { + "properties": { + "doc_n": { + "type": "text" + }, + "doc_list": { + "type": "text" + }, + "doc_escape": { + "type": "text" + }, + "obj_field": { + "properties": { + "field": { "type": "text" } + } + } + } + } +} \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 70a9eff0fe3..8988ddf1491 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -404,6 +404,7 @@ spathParameter indexablePath : pathElement (DOT pathElement)* + | stringLiteral ; pathElement diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java index e97fb51ea90..73d282d1f64 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/SPathRewriteTest.java @@ -64,4 +64,21 @@ public void testSpathMissingPathArgumentHandling() { public void testSpathArgumentDeshuffle() { assertEquals(plan("source = t | spath path=a input=a"), plan("source = t | spath input=a a")); } + + @Test + public void testSpathEscapedParse() { + SPath sp = + (SPath) plan("source = t | spath input=f output=o path=\"attributes.['cluster.name']\""); + Eval ev = (Eval) plan("source = t | eval o=json_extract(f, \"attributes.['cluster.name']\")"); + + assertEquals(ev, sp.rewriteAsEval()); + } + + @Test + public void testSpathEscapedSpaces() { + SPath sp = (SPath) plan("source = t | spath input=f output=o path=\"['abc def ghi']\""); + Eval ev = (Eval) plan("source = t | eval o=json_extract(f, \"['abc def ghi']\")"); + + assertEquals(ev, sp.rewriteAsEval()); + } } From 0263d58cef99b08c2c630f34677d5aab4b3ec64c Mon Sep 17 00:00:00 2001 From: qianheng Date: Wed, 19 Nov 2025 13:59:38 +0800 Subject: [PATCH 65/99] Perform RexNode expression standardization for script push down. (#4795) * RexNode standardization Signed-off-by: Heng Qian * RexNode standardization 2 Signed-off-by: Heng Qian * RexNode standardization 3 Signed-off-by: Heng Qian * RexNode standardization 4 Signed-off-by: Heng Qian * RexNode standardization 5 Signed-off-by: Heng Qian * RexNode standardization 6 Signed-off-by: Heng Qian * RexNode standardization 7 Signed-off-by: Heng Qian * Refine code and add doc about script Signed-off-by: Heng Qian * Add intro-scripts.md Signed-off-by: Heng Qian * Fix IT Signed-off-by: Heng Qian * Refine code Signed-off-by: Heng Qian * Address comments Signed-off-by: Heng Qian * Address comments Signed-off-by: Heng Qian --------- Signed-off-by: Heng Qian --- DEVELOPER_GUIDE.rst | 1 + .../ExpressionScriptSerdeBenchmark.java | 5 +- .../sql/calcite/utils/PlanUtils.java | 3 - docs/dev/intro-scripts.md | 75 +++++++ .../sql/calcite/remote/CalciteExplainIT.java | 12 +- .../calcite/agg_case_cannot_push.yaml | 2 +- .../agg_case_composite_cannot_push.yaml | 2 +- .../calcite/agg_case_num_res_cannot_push.yaml | 2 +- .../calcite/clickbench/q19.yaml | 2 +- .../calcite/clickbench/q28.yaml | 2 +- .../calcite/clickbench/q29.yaml | 2 +- .../calcite/clickbench/q40.yaml | 2 +- .../calcite/clickbench/q43.yaml | 2 +- .../calcite/explain_agg_counts_by6.yaml | 2 +- .../explain_agg_script_timestamp_push.yaml | 2 +- .../explain_agg_script_udt_arg_push.yaml | 2 +- .../explain_agg_sort_on_measure_complex2.yaml | 2 +- ...gg_sort_on_measure_multi_terms_script.yaml | 2 +- .../explain_agg_sort_on_measure_script.yaml | 2 +- .../calcite/explain_agg_with_script.yaml | 2 +- .../explain_agg_with_sum_enhancement.json | 0 .../explain_agg_with_sum_enhancement.yaml | 2 +- ...complex_sort_expr_no_expr_output_push.yaml | 2 +- ...n_complex_sort_expr_project_then_sort.yaml | 2 +- .../explain_complex_sort_expr_push.yaml | 2 +- ...lex_sort_expr_single_expr_output_push.yaml | 2 +- .../explain_complex_sort_nested_expr.yaml | 2 +- .../calcite/explain_count_agg_push7.yaml | 2 +- .../calcite/explain_eval_min.yaml | 2 +- .../explain_filter_function_script_push.yaml | 2 +- .../explain_filter_script_ip_push.json | 6 - .../explain_filter_script_ip_push.yaml | 8 + .../calcite/explain_filter_script_push.yaml | 2 +- .../calcite/explain_isblank.yaml | 2 +- .../calcite/explain_isempty.yaml | 2 +- .../calcite/explain_isempty_or_others.yaml | 2 +- .../explain_min_max_agg_on_derived_field.yaml | 2 +- ...lain_patterns_simple_pattern_agg_push.yaml | 2 +- .../expectedOutput/calcite/explain_regex.yaml | 2 +- .../calcite/explain_regex_negated.yaml | 2 +- .../explain_regexp_match_in_where.json | 6 - .../explain_regexp_match_in_where.yaml | 8 + .../calcite/explain_script_push_on_text.yaml | 2 +- .../calcite/explain_skip_script_encoding.json | 4 +- .../explain_sort_complex_and_simple_expr.yaml | 2 +- .../calcite/explain_sort_type_push.json | 4 +- .../calcite/explain_text_like_function.yaml | 2 +- .../calcite/udf_geoip_in_agg_pushed.yaml | 2 +- .../calcite_no_pushdown/explain_eval_min.yaml | 2 +- .../explain_filter_script_ip_push.json | 6 - .../explain_filter_script_ip_push.yaml | 10 + .../ppl/explain_agg_with_sum_enhancement.json | 17 -- .../rest-api-spec/test/issues/3595.yml | 2 +- .../opensearch/functions/GeoIpFunction.java | 6 + .../opensearch/request/PredicateAnalyzer.java | 33 +-- .../scan/AbstractCalciteIndexScan.java | 7 +- .../scan/CalciteEnumerableIndexScan.java | 26 +-- .../storage/scan/CalciteLogicalIndexScan.java | 14 +- .../storage/scan/context/AbstractAction.java | 4 +- .../context/AggregationBuilderAction.java | 10 +- .../scan/context/OSRequestBuilderAction.java | 10 +- .../storage/scan/context/PushDownContext.java | 15 +- .../storage/scan/context/SortExprDigest.java | 6 + .../storage/script/CalciteScriptEngine.java | 156 +++++++------- .../aggregation/CalciteAggregationScript.java | 9 +- .../CalciteAggregationScriptLeafFactory.java | 17 +- .../storage/script/core/CalciteScript.java | 9 +- .../script/filter/CalciteFilterScript.java | 9 +- .../CalciteFilterScriptLeafFactory.java | 17 +- .../script/sort/CalciteNumberSortScript.java | 29 +-- .../CalciteNumberSortScriptLeafFactory.java | 17 +- .../script/sort/CalciteStringSortScript.java | 30 ++- .../CalciteStringSortScriptLeafFactory.java | 17 +- .../storage/serde/ExtendedRelJson.java | 13 +- .../serde/OpenSearchRelInputTranslator.java | 2 +- .../storage/serde/RelJsonSerializer.java | 78 ++----- .../storage/serde/RexStandardizer.java | 194 ++++++++++++++++++ .../storage/serde/ScriptParameterHelper.java | 86 ++++++++ .../opensearch/util/OpenSearchRelOptUtil.java | 33 --- .../request/AggregateAnalyzerTest.java | 4 +- .../storage/serde/ExtendedRelJsonTest.java | 7 +- .../storage/serde/RelJsonSerializerTest.java | 111 +++++++--- 82 files changed, 800 insertions(+), 410 deletions(-) create mode 100644 docs/dev/intro-scripts.md delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_script_ip_push.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_script_ip_push.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_agg_with_sum_enhancement.json create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ScriptParameterHelper.java diff --git a/DEVELOPER_GUIDE.rst b/DEVELOPER_GUIDE.rst index 7a1fac66d4a..92304c51606 100644 --- a/DEVELOPER_GUIDE.rst +++ b/DEVELOPER_GUIDE.rst @@ -318,6 +318,7 @@ For test cases, you can use the cases in the following checklist in case you mis - *Explain* - DSL for simple query + - Script for complex expressions, see details in `intro-scripts <./docs/dev/intro-scripts.md>`_. - Execution plan for complex query like JOIN - *Response format* diff --git a/benchmarks/src/jmh/java/org/opensearch/sql/expression/operator/predicate/ExpressionScriptSerdeBenchmark.java b/benchmarks/src/jmh/java/org/opensearch/sql/expression/operator/predicate/ExpressionScriptSerdeBenchmark.java index 10ec42bf8d4..b46ac18f2c1 100644 --- a/benchmarks/src/jmh/java/org/opensearch/sql/expression/operator/predicate/ExpressionScriptSerdeBenchmark.java +++ b/benchmarks/src/jmh/java/org/opensearch/sql/expression/operator/predicate/ExpressionScriptSerdeBenchmark.java @@ -33,6 +33,7 @@ import org.opensearch.sql.expression.function.PPLFuncImpTable; import org.opensearch.sql.opensearch.storage.serde.DefaultExpressionSerializer; import org.opensearch.sql.opensearch.storage.serde.RelJsonSerializer; +import org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper; @Warmup(iterations = 1) @Measurement(iterations = 10) @@ -74,7 +75,9 @@ public void testRexNodeJsonSerde() { SqlStdOperatorTable.NOT_EQUALS, rexUpper, rexBuilder.makeLiteral("ABOUT")); Map fieldTypes = Map.of("Referer", ExprCoreType.STRING); - String serializedStr = relJsonSerializer.serialize(rexNotEquals, rowType, fieldTypes); + String serializedStr = + relJsonSerializer.serialize( + rexNotEquals, new ScriptParameterHelper(rowType.getFieldList(), fieldTypes)); relJsonSerializer.deserialize(serializedStr); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 633472d958f..c1c572225fe 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -68,9 +68,6 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; - String DIRECTION = "DIRECTION"; - String NULL_DIRECTION = "NULL_DIRECTION"; - static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { case MICROSECOND -> SpanUnit.MICROSECOND; diff --git a/docs/dev/intro-scripts.md b/docs/dev/intro-scripts.md new file mode 100644 index 00000000000..e4c3f9ad9a8 --- /dev/null +++ b/docs/dev/intro-scripts.md @@ -0,0 +1,75 @@ +# Scripts + +## Table of contents +- [V2 Scripts](#v2-scripts) +- [V3 Scripts](#v3-scripts) + +Script queries are generated when we push down some operators(e.g. `FILTER`, `AGGREGATE`, `SORT`) with functions that are not supported by OpenSearch DSL. + +We support script push down on both v2 and v3 engine, while their registered script languages and script format are different. + +## V2 Scripts + +V2 script is a v2 expression tree encoded in base64 string. It's a pure byte string content without any parameters. + +Example: + +For any query, it will generate a script query like below, with `V2_SCRIPT_ENCODED_BYTE_STRING` varies while it's also unreadable. + +```json +{ + "script": { + "source": "{\"langType\":\"v2\", \"script\":\"\"}", + "lang": "opensearch_compounded_script" + } +} +``` + +## V3 Scripts + +V3 script is a calcite expression tree encoded in base64 string. But before encoding, it has been standardized to make it reusable and then get higher cache hit ratio. As so, some specific information(like fields names, literal values) are extracted from the expression tree and put in the params field. + +Example1: + +Expression tree of `case(age < 35, 'u35' else email)` will generate a script query like below. And the expression tree will be standardized to `case(?0 < ?1, ?2 else ?3)` before encoding, in which `?i` indicates the i-th parameter. + +```json +{ + "script": { + "source": "{\"langType\":\"calcite\", \"script\":\"\"}", + "lang": "opensearch_compounded_script", + "params": { + "utcTimestamp": 17630261838681530000, + "SOURCES": [0, 2, 2, 1], + "DIGESTS": ["age", 35, "u35", "email"] + } + } +} +``` +There are usually 3 parts in the params: + +1. `utcTimestamp`: The timestamp when the query is executed, it's used by some time-related functions. +2. `SOURCES`: The source of the parameters, implies which source to retrieve the value for the i-th parameter. See class CalciteScriptEngine::Source, 0 stands for `DOC_VALUE`, 1 stands for `SOURCE`, 2 stands for `LITERAL`. +3. `DIGESTS`: The digest of each parameter, it's used as the key to retrieve the value from the corresponding sources. It will be field name for `DOC_VALUE` and `SOURCE`, while the literal value itself for `LITERAL`. + + +Example2: + +Sort script will add another specific parameter `MISSING_MAX`, which decides using MIN or MAX value for the missing value(i.e. NULL). The value of this parameter is derived from the combination of sort direction and null direction, see detail in SortExprDigest::isMissingMax. + +Sort command `sort (age + balance)` will generate a script query like below. + +```json +{ + "script": { + "source": "{\"langType\":\"calcite\", \"script\":\"\"}", + "lang": "opensearch_compounded_script", + "params": { + "MISSING_MAX": false, + "utcTimestamp": 17630261838681530000, + "SOURCES": [0, 0], + "DIGESTS": ["age", "balance"] + } + } +} +``` diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 46a86686cb6..c981dfee8cb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -688,9 +688,9 @@ public void testExplainRegexMatchInWhereWithScriptPushdown() throws IOException enabledOnlyWhenPushdownIsEnabled(); String query = String.format("source=%s | where regexp_match(name, 'hello')", TEST_INDEX_STRINGS); - var result = explainQueryToString(query); - String expected = loadFromFile("expectedOutput/calcite/explain_regexp_match_in_where.json"); - assertJsonEqualsIgnoreId(expected, result); + var result = explainQueryYaml(query); + String expected = loadFromFile("expectedOutput/calcite/explain_regexp_match_in_where.yaml"); + assertYamlEqualsIgnoreId(expected, result); } @Test @@ -1417,9 +1417,9 @@ public void testStrftimeFunctionExplain() throws IOException { // Script generation is not stable in v2 @Test public void testExplainPushDownScriptsContainingUDT() throws IOException { - assertJsonEqualsIgnoreId( - loadExpectedPlan("explain_filter_script_ip_push.json"), - explainQueryToString( + assertYamlEqualsIgnoreId( + loadExpectedPlan("explain_filter_script_ip_push.yaml"), + explainQueryYaml( String.format( "source=%s | where cidrmatch(host, '0.0.0.0/24') | fields host", TEST_INDEX_WEBLOGS))); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml index d04bbd2df44..c9929d83e81 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAe3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0BXZ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAidTMwIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJTRUFSQ0giLAogICAgICAgICJraW5kIjogIlNFQVJDSCIsCiAgICAgICAgInN5bnRheCI6ICJJTlRFUk5BTCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogewogICAgICAgICAgICAicmFuZ2VTZXQiOiBbCiAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgImNsb3NlZCIsCiAgICAgICAgICAgICAgICAiMzAiLAogICAgICAgICAgICAgICAgIjQwIgogICAgICAgICAgICAgIF0KICAgICAgICAgICAgXSwKICAgICAgICAgICAgIm51bGxBcyI6ICJVTktOT1dOIgogICAgICAgICAgfSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogInU0MCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MTAwIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AANhZ2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdJTlRFR0VSeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGFHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNFQVJDSCIsCiAgICAgICAgImtpbmQiOiAiU0VBUkNIIiwKICAgICAgICAic3ludGF4IjogIklOVEVSTkFMIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogewogICAgICAgICAgICAicmFuZ2VTZXQiOiBbCiAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgImNsb3NlZCIsCiAgICAgICAgICAgICAgICAiMzAiLAogICAgICAgICAgICAgICAgIjQwIgogICAgICAgICAgICAgIF0KICAgICAgICAgICAgXSwKICAgICAgICAgICAgIm51bGxBcyI6ICJVTktOT1dOIgogICAgICAgICAgfSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiA0LAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,0,2,2],"DIGESTS":["age",30,"u30","age","u40","u100"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml index 82cbadeb735..20199016fb4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA5nsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQCe3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMSwKICAgICAgIm5hbWUiOiAiJDEiCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAACdAADYWdlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAHSU5URUdFUnQABWVtYWlsc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+ABB4cH5xAH4ACnQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgALdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AG3hwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AHQAAAABzcQB+AAAAAAADdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDJXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,1],"DIGESTS":["age",35,"u35","email"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml index 9502c66a448..f50f5e44582 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 30, 100)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAe3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Ap17CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAzMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAxMDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AANhZ2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdJTlRFR0VSeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC9HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,2],"DIGESTS":["age",30,30,100]}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml index 88a57952c89..e0c99960471 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], m=[EXTRACT('minute':VARCHAR, $17)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[UserID, m, SearchPhrase, count()], SORT_AGG_METRICS->[3 DESC LAST], PROJECT->[count(), UserID, m, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID|m":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiRXZlbnRUaW1lIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Ae57CiAgIm9wIjogewogICAgIm5hbWUiOiAiRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAibGl0ZXJhbCI6ICJtaW51dGUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACUV2ZW50VGltZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzdAAPTGphdmEvdXRpbC9NYXA7eHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAlUSU1FU1RBTVB+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgASdAAERGF0ZXNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGXhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGwAAAABzcgATamF2YS51dGlsLkFycmF5TGlzdHiB0h2Zx2GdAwABSQAEc2l6ZXhwAAAAA3cEAAAAA3QAE3l5eXktTU0tZGQgSEg6bW06c3N0ABlzdHJpY3RfZGF0ZV9vcHRpb25hbF90aW1ldAAMZXBvY2hfbWlsbGlzeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"value_type":"long"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[UserID, m, SearchPhrase, count()], SORT_AGG_METRICS->[3 DESC LAST], PROJECT->[count(), UserID, m, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID|m":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml index 838a201cf92..daa53e1c368 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml @@ -14,4 +14,4 @@ calcite: EnumerableLimit(fetch=[25]) EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[100000], expr#4=[>($t1, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, CounterID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($1),c=COUNT()), PROJECT->[l, c, CounterID]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"CounterID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"CounterID":{"terms":{"field":"CounterID","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAknsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJVUkwiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQApnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiAwLAogICAgICAibmFtZSI6ICIkMCIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AANVUkx+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAZTVFJJTkd4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, CounterID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($1),c=COUNT()), PROJECT->[l, c, CounterID]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"CounterID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"CounterID":{"terms":{"field":"CounterID","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["URL"]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml index 9a7638f1811..4ec2ad5e6c8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml @@ -15,4 +15,4 @@ calcite: EnumerableLimit(fetch=[25]) EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[100000], expr#5=[>($t1, $t4)], proj#0..3=[{exprs}], $condition=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[Referer], SCRIPT->AND(<>($0, ''), IS NOT NULL(REGEXP_REPLACE($0, '^https?://(?:www\.)?([^/]+)/.*$':VARCHAR, '$1'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($2),c=COUNT(),min(Referer)=MIN($1)), PROJECT->[l, c, min(Referer), k]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"Referer","boost":1.0}}],"must_not":[{"term":{"Referer":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJSZWZlcmVyIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AsN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiSVMgTk9UIE5VTEwiLAogICAgImtpbmQiOiAiSVNfTk9UX05VTEwiLAogICAgInN5bnRheCI6ICJQT1NURklYIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAiXmh0dHBzPzovLyg/Ond3d1xcLik/KFteL10rKS8uKiQiLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6ICIkMSIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMgogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQAB1JlZmVyZXJ+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAZTVFJJTkd4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["Referer"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"k":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJSZWZlcmVyIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Adp7CiAgIm9wIjogewogICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJeaHR0cHM/Oi8vKD86d3d3XFwuKT8oW14vXSspLy4qJCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICIkMSIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAyCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdSZWZlcmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAGU1RSSU5HeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJSZWZlcmVyIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AKZ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAHUmVmZXJlcn5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABlNUUklOR3h4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}},"min(Referer)":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"Referer"}],"sort":[{"Referer":{"order":"asc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[Referer], SCRIPT->AND(<>($0, ''), IS NOT NULL(REGEXP_REPLACE($0, '^https?://(?:www\.)?([^/]+)/.*$':VARCHAR, '$1'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($2),c=COUNT(),min(Referer)=MIN($1)), PROJECT->[l, c, min(Referer), k]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"Referer","boost":1.0}}],"must_not":[{"term":{"Referer":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDEnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJUyBOT1QgTlVMTCIsCiAgICAia2luZCI6ICJJU19OT1RfTlVMTCIsCiAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMgogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["Referer","^https?://(?:www\\.)?([^/]+)/.*$","$1"]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["Referer"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"k":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDIKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["Referer","^https?://(?:www\\.)?([^/]+)/.*$","$1"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["Referer"]}}}},"min(Referer)":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"Referer"}],"sort":[{"Referer":{"order":"asc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml index 3c74a22c40f..00d724db076 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml @@ -13,4 +13,4 @@ calcite: EnumerableCalc(expr#0..5=[{inputs}], PageViews=[$t5], TraficSourceID=[$t0], SearchEngineID=[$t2], AdvEngineID=[$t1], Src=[$t3], Dst=[$t4]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, TraficSourceID, AdvEngineID, URL, SearchEngineID, IsRefresh, Referer, CounterID], FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","TraficSourceID","AdvEngineID","URL","SearchEngineID","IsRefresh","Referer","CounterID"],"excludes":[]},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBT3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIlNlYXJjaEVuZ2luZUlEIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiU01BTExJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJBZHZFbmdpbmVJRCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIlJlZmVyZXIiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQE8XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICAgICAgfSwKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogMCwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJpbnB1dCI6IDEsCiAgICAgICAgICAgICAgIm5hbWUiOiAiJDEiCiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAibGl0ZXJhbCI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAyLAogICAgICAibmFtZSI6ICIkMiIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIiIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAADdAAOU2VhcmNoRW5naW5lSUR+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAVTSE9SVHQAB1JlZmVyZXJ+cQB+AAp0AAZTVFJJTkd0AAtBZHZFbmdpbmVJRHEAfgAMeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, TraficSourceID, AdvEngineID, URL, SearchEngineID, IsRefresh, Referer, CounterID], FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","TraficSourceID","AdvEngineID","URL","SearchEngineID","IsRefresh","Referer","CounterID"],"excludes":[]},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGCnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAib3AiOiB7CiAgICAgICAgICAgICJuYW1lIjogIj0iLAogICAgICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgICAgIH0sCiAgICAgICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlNNQUxMSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiA0LAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml index 019cd05cdac..b2a68c85d2f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml @@ -13,4 +13,4 @@ calcite: EnumerableCalc(expr#0..1=[{inputs}], PageViews=[$t1], M=[$t0]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiRXZlbnRUaW1lIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AhN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiREFURV9GT1JNQVQiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICIlWS0lbS0lZCAlSDowMDowMCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogLTEKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAlFdmVudFRpbWVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3QAD0xqYXZhL3V0aWwvTWFwO3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAJVElNRVNUQU1QfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEnQABERhdGVzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABl4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABsAAAAAc3IAE2phdmEudXRpbC5BcnJheUxpc3R4gdIdmcdhnQMAAUkABHNpemV4cAAAAAN3BAAAAAN0ABN5eXl5LU1NLWRkIEhIOm1tOnNzdAAZc3RyaWN0X2RhdGVfb3B0aW9uYWxfdGltZXQADGVwb2NoX21pbGxpc3h4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCgHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJEQVRFX0ZPUk1BVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogLTEKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["EventTime","%Y-%m-%d %H:00:00"]}},"missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by6.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by6.yaml index 9d452158542..b0b23b7ce10 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by6.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by6.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(gender=[$4], b_1=[+($3, 1)], $f3=[POWER($3, 2)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count(b_1)=COUNT($1),c3=COUNT($2)), PROJECT->[count(b_1), c3, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(b_1)":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAfnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AQF7CiAgIm9wIjogewogICAgIm5hbWUiOiAiKyIsCiAgICAia2luZCI6ICJQTFVTIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiAwLAogICAgICAibmFtZSI6ICIkMCIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQAB2JhbGFuY2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05HeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}},"c3":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAfnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0ARF7CiAgIm9wIjogewogICAgIm5hbWUiOiAiUE9XRVIiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3h4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count(b_1)=COUNT($1),c3=COUNT($2)), PROJECT->[count(b_1), c3, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(b_1)":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",1]}}}},"c3":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBVXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJQT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",2]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.yaml index bfaf02ad47c..03ae462f020 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.yaml @@ -7,4 +7,4 @@ calcite: LogicalProject(t=[UNIX_TIMESTAMP($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), t], SORT->[1 ASC FIRST], LIMIT->3, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":3,"sources":[{"t":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AWd7CiAgIm9wIjogewogICAgIm5hbWUiOiAiVU5JWF9USU1FU1RBTVAiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJET1VCTEUiLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACWJpcnRoZGF0ZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzdAAPTGphdmEvdXRpbC9NYXA7eHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAlUSU1FU1RBTVB+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgASdAAERGF0ZXNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGXhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGwAAAABzcQB+AAAAAAABdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"value_type":"double","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), t], SORT->[1 ASC FIRST], LIMIT->3, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":3,"sources":[{"t":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQB4XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJVTklYX1RJTUVTVEFNUCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiRE9VQkxFIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["birthdate"]}},"missing_bucket":true,"value_type":"double","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml index 0c34a565cdc..ae68535cb0b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], t=[DATE_ADD($3, 1:INTERVAL DAY)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SCRIPT->IS NOT NULL(DATE_ADD($3, 1:INTERVAL DAY)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(t,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AyV7CiAgIm9wIjogewogICAgIm5hbWUiOiAiSVMgTk9UIE5VTEwiLAogICAgImtpbmQiOiAiSVNfTk9UX05VTEwiLAogICAgInN5bnRheCI6ICJQT1NURklYIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiREFURV9BREQiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAJYmlydGhkYXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0ZVR5cGWeLVKuEH3KrwIAAUwAB2Zvcm1hdHN0ABBMamF2YS91dGlsL0xpc3Q7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXN0AA9MamF2YS91dGlsL01hcDt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQACVRJTUVTVEFNUH5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+ABJ0AAREYXRlc3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAZeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAbAAAAAHNxAH4AAAAAAAF3BAAAAAB4eHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(t,1d)":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAt3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0BQN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiU1BBTiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiREFURV9BREQiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogImQiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICJwcmVjaXNpb24iOiAtMQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACWJpcnRoZGF0ZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzdAAPTGphdmEvdXRpbC9NYXA7eHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAlUSU1FU1RBTVB+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgASdAAERGF0ZXNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGXhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGwAAAABzcQB+AAAAAAABdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":false,"value_type":"long","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SCRIPT->IS NOT NULL(DATE_ADD($3, 1:INTERVAL DAY)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(t,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDs3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJJUyBOT1QgTlVMTCIsCiAgICAia2luZCI6ICJJU19OT1RfTlVMTCIsCiAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJEQVRFX0FERCIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["birthdate"]}},"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(t,1d)":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFmXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTUEFOIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJEQVRFX0FERCIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAxCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAibnVsbGFibGUiOiB0cnVlLAogICAgInByZWNpc2lvbiI6IC0xCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["birthdate",1,"d"]}},"missing_bucket":false,"value_type":"long","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml index 1a3df86b66a..0bc8e26bdd2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},sum(balance)=SUM($0),count()=COUNT(),d=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[4 DESC LAST], PROJECT->[sum(balance), count(), d, gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJzdGF0ZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydACjewogICJvcCI6IHsKICAgICJuYW1lIjogIkxPV0VSIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABXN0YXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},sum(balance)=SUM($0),count()=COUNT(),d=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[4 DESC LAST], PROJECT->[sum(balance), count(), d, gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml index b020c1003b1..ddbe8b2764e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), new_gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJnZW5kZXIiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQAo3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiAwLAogICAgICAibmFtZSI6ICIkMCIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAZnZW5kZXJzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AC3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+ABF0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAYeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAaAAAAAHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAMfnEAfgAQdAAGU1RSSU5HfnEAfgAUdAAHS2V5d29yZHEAfgAZeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJzdGF0ZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydACjewogICJvcCI6IHsKICAgICJuYW1lIjogIkxPV0VSIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABXN0YXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), new_gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml index 52852044d92..9292b81e5be 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJzdGF0ZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydACjewogICJvcCI6IHsKICAgICJuYW1lIjogIkxPV0VSIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABXN0YXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml index fcbc002565c..0726597ef90 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[CHAR_LENGTH($t0)], sum=[$t1], len=[$t2], gender=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum=SUM($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"sum":{"sum":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAfnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AQN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiKyIsCiAgICAia2luZCI6ICJQTFVTIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiAwLAogICAgICAibmFtZSI6ICIkMCIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogMTAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAHYmFsYW5jZX5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABExPTkd4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum=SUM($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"sum":{"sum":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",100]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.json deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.yaml index e87eb6d895c..8fd23589e33 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..3=[{inputs}], expr#4=[100], expr#5=[*($t2, $t4)], expr#6=[+($t1, $t5)], expr#7=[-($t1, $t5)], expr#8=[*($t1, $t4)], sum(balance)=[$t1], sum(balance + 100)=[$t6], sum(balance - 100)=[$t7], sum(balance * 100)=[$t8], sum(balance / 100)=[$t3], gender=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum(balance)=SUM($1),sum(balance + 100)_COUNT=COUNT($1),sum(balance / 100)=SUM($2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"sum(balance + 100)_COUNT":{"value_count":{"field":"balance"}},"sum(balance / 100)":{"sum":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAfnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Ac97CiAgIm9wIjogewogICAgIm5hbWUiOiAiRElWSURFIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAxMDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAHYmFsYW5jZX5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABExPTkd4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum(balance)=SUM($1),sum(balance + 100)_COUNT=COUNT($1),sum(balance / 100)=SUM($2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"sum(balance + 100)_COUNT":{"value_count":{"field":"balance"}},"sum(balance / 100)":{"sum":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCEXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJESVZJREUiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",100]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml index 6461c40061c..482e7d299e7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_no_expr_output_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], age2=[+($10, $7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST], LIMIT->10000, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST], LIMIT->10000, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRHsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml index 256c393b069..77ebbda2729 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_project_then_sort.yaml @@ -6,4 +6,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age=[$t0], $f1=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRHsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml index b509fb3117f..8762b609c0b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], age=[$t0], $f1=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRHsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml index db832db7b4d..b71697ce1dd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_single_expr_output_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[+($t0, $t1)], $f0=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, balance], SORT_EXPR->[+($0, $1) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["age","balance"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRHsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml index b57328afbae..91d013e1536 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_nested_expr.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[-($t13, $t10)], proj#0..14=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[-(+($10, $7), $10) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydAHbewogICJvcCI6IHsKICAgICJuYW1lIjogIi0iLAogICAgImtpbmQiOiAiTUlOVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICIrIiwKICAgICAgICAia2luZCI6ICJQTFVTIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDEsCiAgICAgICAgICAibmFtZSI6ICIkMSIKICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfQogIF0sCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAnQAB2JhbGFuY2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05HdAADYWdlfnEAfgAKdAAHSU5URUdFUnh4\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[-(+($10, $7), $10) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCsnsKICAib3AiOiB7CiAgICAibmFtZSI6ICItIiwKICAgICJraW5kIjogIk1JTlVTIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiKyIsCiAgICAgICAgImtpbmQiOiAiUExVUyIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0sCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0KfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0,0],"DIGESTS":["age","balance","age"]}},"type":"number","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push7.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push7.yaml index c54440bbf61..07a4d5e2268 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push7.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_count_agg_push7.yaml @@ -5,4 +5,4 @@ calcite: LogicalProject($f1=[+($3, 1)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},cnt=COUNT($0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"cnt":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAfnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AQF7CiAgIm9wIjogewogICAgIm5hbWUiOiAiKyIsCiAgICAia2luZCI6ICJQTFVTIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiAwLAogICAgICAibmFtZSI6ICIkMCIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQAB2JhbGFuY2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05HeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},cnt=COUNT($0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"cnt":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",1]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml index 71e2911eb09..d28a4ef30ec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eval_min.yaml @@ -5,4 +5,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..10=[{inputs}], expr#11=[1], expr#12=[2], expr#13=[3], expr#14=['banana':VARCHAR], expr#15=[SCALAR_MIN($t11, $t12, $t13, $t8, $t14)], proj#0..10=[{exprs}], $f11=[$t15]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_function_script_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_function_script_push.yaml index 7d61dbc0b8b..8c4b080c8f2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_function_script_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_function_script_push.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(=(CHAR_LENGTH($1), 5), =(ABS($8), 32), =($3, 39225))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, balance, age], SCRIPT->AND(=(CHAR_LENGTH($0), 5), =(ABS($2), 32), =($1, 39225)), PROJECT->[firstname, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAmHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQBrXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogNSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgALeHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEXQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABh4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABoAAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+AAx+cQB+ABB0AAZTVFJJTkd+cQB+ABR0AAdLZXl3b3JkcQB+ABl4eHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAensKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQBqXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkFCUyIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDMyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAADYWdlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3h4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},{"term":{"balance":{"value":39225,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, balance, age], SCRIPT->AND(=(CHAR_LENGTH($0), 5), =(ABS($2), 32), =($1, 39225)), PROJECT->[firstname, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCG3sKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["firstname.keyword",5]}},"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQB+HsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkFCUyIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["age",32]}},"boost":1.0}},{"term":{"balance":{"value":39225,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.json deleted file mode 100644 index 282e7ed4c96..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(host=[$0])\n LogicalFilter(condition=[CIDRMATCH($0, '0.0.0.0/24':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], SCRIPT->CIDRMATCH($0, '0.0.0.0/24':VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAknsKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfSVAiLAogICAgICAidHlwZSI6ICJPVEhFUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImhvc3QiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQB9XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSURSTUFUQ0giLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICIwLjAuMC4wLzI0IiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJPT0xFQU4iLAogICAgIm51bGxhYmxlIjogdHJ1ZQogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABGhvc3R+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAJJUHh4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":1758184003318649000}},\"boost\":1.0}},\"_source\":{\"includes\":[\"host\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.yaml new file mode 100644 index 00000000000..6127126e755 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_ip_push.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(host=[$0]) + LogicalFilter(condition=[CIDRMATCH($0, '0.0.0.0/24':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[PROJECT->[host], SCRIPT->CIDRMATCH($0, '0.0.0.0/24':VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCR3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSURSTUFUQ0giLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ1ZHQiOiAiRVhQUl9JUCIsCiAgICAgICAgInR5cGUiOiAiT1RIRVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["host","0.0.0.0/24"]}},"boost":1.0}},"_source":{"includes":["host"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.yaml index 4992957b230..26be3b4c787 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_script_push.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[AND(=($1, 'Amber'), =(-($8, 2), 30))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, age], SCRIPT->AND(=($0, 'Amber'), =(-($1, 2), 30)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"term":{"firstname.keyword":{"value":"Amber","boost":1.0}}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAensKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQCcnsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIi0iLAogICAgICAgICJraW5kIjogIk1JTlVTIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAADYWdlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3h4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, age], SCRIPT->AND(=($0, 'Amber'), =(-($1, 2), 30)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"term":{"firstname.keyword":{"value":"Amber","boost":1.0}}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCxnsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIi0iLAogICAgICAgICJraW5kIjogIk1JTlVTIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["age",2,30]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml index aa739610250..2877a74bf7b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1))), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAmHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQENHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiVFJJTSIsCiAgICAgICAgICAgICJraW5kIjogIlRSSU0iLAogICAgICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogIkJPVEgiLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiU1lNQk9MIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImxpdGVyYWwiOiAiICIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IDEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfQogICAgICBdCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAJZmlyc3RuYW1lc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1))), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiVFJJTSIsCiAgICAgICAgICAgICJraW5kIjogIlRSSU0iLAogICAgICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogIkJPVEgiLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiU1lNQk9MIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IDEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfQogICAgICAgICAgXQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0],"DIGESTS":["firstname.keyword"," ","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml index 349be65b454..df97332ae98 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[OR(IS NULL($1), IS EMPTY($1))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAmHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQCBXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAlmaXJzdG5hbWVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AC3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+ABF0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAYeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAaAAAAAHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAMfnEAfgAQdAAGU1RSSU5HfnEAfgAUdAAHS2V5d29yZHEAfgAZeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC13sKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["firstname.keyword","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml index b940a11198e..b3d22c2bfa1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[OR(=($4, 'M'), IS NULL($1), IS EMPTY($1))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), =($4, 'M'), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBBHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJnZW5kZXIiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQDdXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMSwKICAgICAgICAgICJuYW1lIjogIiQxIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAiTSIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIEVNUFRZIiwKICAgICAgICAia2luZCI6ICJPVEhFUiIsCiAgICAgICAgInN5bnRheCI6ICJQT1NURklYIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAnQACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgALeHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEXQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABh4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABoAAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+AAx+cQB+ABB0AAZTVFJJTkd+cQB+ABR0AAdLZXl3b3JkcQB+ABl4dAAGZ2VuZGVyc3EAfgAKcQB+ABJxAH4AFXEAfgAZc3EAfgAAAAAAA3cEAAAAAnEAfgAecQB+AB94eHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), =($4, 'M'), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQEs3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIEVNUFRZIiwKICAgICAgICAia2luZCI6ICJPVEhFUiIsCiAgICAgICAgInN5bnRheCI6ICJQT1NURklYIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,0],"DIGESTS":["firstname.keyword","gender.keyword","M","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_min_max_agg_on_derived_field.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_min_max_agg_on_derived_field.yaml index 805c42527a8..f083def95a4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_min_max_agg_on_derived_field.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_min_max_agg_on_derived_field.yaml @@ -5,4 +5,4 @@ calcite: LogicalProject(balance2=[CEIL(DIVIDE($3, 10000.0:DECIMAL(6, 1)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},MIN(balance2)=MIN($0),MAX(balance2)=MAX($0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"MIN(balance2)":{"min":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAfnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Awl7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0VJTCIsCiAgICAia2luZCI6ICJDRUlMIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJESVZJREUiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxMDAwMC4wLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIkRFQ0lNQUwiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IDYsCiAgICAgICAgICAgICJzY2FsZSI6IDEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiREVDSU1BTCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogMjcsCiAgICAgICAgInNjYWxlIjogNwogICAgICB9LAogICAgICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgICAgICJkeW5hbWljIjogZmFsc2UKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3h4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}},"MAX(balance2)":{"max":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAfnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Awl7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0VJTCIsCiAgICAia2luZCI6ICJDRUlMIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJESVZJREUiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxMDAwMC4wLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIkRFQ0lNQUwiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IDYsCiAgICAgICAgICAgICJzY2FsZSI6IDEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiREVDSU1BTCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogMjcsCiAgICAgICAgInNjYWxlIjogNwogICAgICB9LAogICAgICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgICAgICJkeW5hbWljIjogZmFsc2UKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3h4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},MIN(balance2)=MIN($0),MAX(balance2)=MAX($0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"MIN(balance2)":{"min":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDVHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDRUlMIiwKICAgICJraW5kIjogIkNFSUwiLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkRJVklERSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDEwMDAwLjAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiREVDSU1BTCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogNiwKICAgICAgICAgICAgInNjYWxlIjogMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJERUNJTUFMIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAyNywKICAgICAgICAic2NhbGUiOiA3CiAgICAgIH0sCiAgICAgICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAgICAgImR5bmFtaWMiOiBmYWxzZQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["balance"]}}}},"MAX(balance2)":{"max":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDVHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDRUlMIiwKICAgICJraW5kIjogIkNFSUwiLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkRJVklERSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDEwMDAwLjAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiREVDSU1BTCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogNiwKICAgICAgICAgICAgInNjYWxlIjogMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJERUNJTUFMIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAyNywKICAgICAgICAic2NhbGUiOiA3CiAgICAgIH0sCiAgICAgICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAgICAgImR5bmFtaWMiOiBmYWxzZQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["balance"]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml index 16c87b3ae02..dfcb6e6b7e0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], patterns_field=[$t6], pattern_count=[$t1], tokens=[$t9], sample_logs=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydAYZewogICJvcCI6IHsKICAgICJuYW1lIjogIkNBU0UiLAogICAgImtpbmQiOiAiQ0FTRSIsCiAgICAic3ludGF4IjogIlNQRUNJQUwiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJTRUFSQ0giLAogICAgICAgICJraW5kIjogIlNFQVJDSCIsCiAgICAgICAgInN5bnRheCI6ICJJTlRFUk5BTCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogewogICAgICAgICAgICAicmFuZ2VTZXQiOiBbCiAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgInNpbmdsZXRvbiIsCiAgICAgICAgICAgICAgICAie1widmFsdWVcIjpcIlwiLFwiY2hhcnNldE5hbWVcIjpcIklTTy04ODU5LTFcIixcImNvbGxhdGlvblwiOntcImNvbGxhdGlvbk5hbWVcIjpcIklTTy04ODU5LTEkZW5fVVMkcHJpbWFyeVwiLFwiY29lcmNpYmlsaXR5XCI6XCJJTVBMSUNJVFwiLFwibG9jYWxlXCI6XCJlbl9VU1wifSxcImNoYXJzZXRcIjpcIklTTy04ODU5LTFcIixcInZhbHVlQnl0ZXNcIjpudWxsfSIKICAgICAgICAgICAgICBdCiAgICAgICAgICAgIF0sCiAgICAgICAgICAgICJudWxsQXMiOiAiVFJVRSIKICAgICAgICAgIH0sCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIiIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAiW2EtekEtWjAtOV0rIiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAiPCo+IiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABWVtYWlsc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlHsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydAYZewogICJvcCI6IHsKICAgICJuYW1lIjogIkNBU0UiLAogICAgImtpbmQiOiAiQ0FTRSIsCiAgICAic3ludGF4IjogIlNQRUNJQUwiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJTRUFSQ0giLAogICAgICAgICJraW5kIjogIlNFQVJDSCIsCiAgICAgICAgInN5bnRheCI6ICJJTlRFUk5BTCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogewogICAgICAgICAgICAicmFuZ2VTZXQiOiBbCiAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgInNpbmdsZXRvbiIsCiAgICAgICAgICAgICAgICAie1widmFsdWVcIjpcIlwiLFwiY2hhcnNldE5hbWVcIjpcIklTTy04ODU5LTFcIixcImNvbGxhdGlvblwiOntcImNvbGxhdGlvbk5hbWVcIjpcIklTTy04ODU5LTEkZW5fVVMkcHJpbWFyeVwiLFwiY29lcmNpYmlsaXR5XCI6XCJJTVBMSUNJVFwiLFwibG9jYWxlXCI6XCJlbl9VU1wifSxcImNoYXJzZXRcIjpcIklTTy04ODU5LTFcIixcInZhbHVlQnl0ZXNcIjpudWxsfSIKICAgICAgICAgICAgICBdCiAgICAgICAgICAgIF0sCiAgICAgICAgICAgICJudWxsQXMiOiAiVFJVRSIKICAgICAgICAgIH0sCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIiIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAiW2EtekEtWjAtOV0rIiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAiPCo+IiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABWVtYWlsc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"email.keyword"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQG6HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiU0VBUkNIIiwKICAgICAgICAia2luZCI6ICJTRUFSQ0giLAogICAgICAgICJzeW50YXgiOiAiSU5URVJOQUwiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJzaW5nbGV0b24iLAogICAgICAgICAgICAgICAgIntcInZhbHVlXCI6XCJcIixcImNoYXJzZXROYW1lXCI6XCJJU08tODg1OS0xXCIsXCJjb2xsYXRpb25cIjp7XCJjb2xsYXRpb25OYW1lXCI6XCJJU08tODg1OS0xJGVuX1VTJHByaW1hcnlcIixcImNvZXJjaWJpbGl0eVwiOlwiSU1QTElDSVRcIixcImxvY2FsZVwiOlwiZW5fVVNcIn0sXCJjaGFyc2V0XCI6XCJJU08tODg1OS0xXCIsXCJ2YWx1ZUJ5dGVzXCI6bnVsbH0iCiAgICAgICAgICAgICAgXQogICAgICAgICAgICBdLAogICAgICAgICAgICAibnVsbEFzIjogIlRSVUUiCiAgICAgICAgICB9LAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,2],"DIGESTS":["email.keyword","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQG6HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiU0VBUkNIIiwKICAgICAgICAia2luZCI6ICJTRUFSQ0giLAogICAgICAgICJzeW50YXgiOiAiSU5URVJOQUwiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJzaW5nbGV0b24iLAogICAgICAgICAgICAgICAgIntcInZhbHVlXCI6XCJcIixcImNoYXJzZXROYW1lXCI6XCJJU08tODg1OS0xXCIsXCJjb2xsYXRpb25cIjp7XCJjb2xsYXRpb25OYW1lXCI6XCJJU08tODg1OS0xJGVuX1VTJHByaW1hcnlcIixcImNvZXJjaWJpbGl0eVwiOlwiSU1QTElDSVRcIixcImxvY2FsZVwiOlwiZW5fVVNcIn0sXCJjaGFyc2V0XCI6XCJJU08tODg1OS0xXCIsXCJ2YWx1ZUJ5dGVzXCI6bnVsbH0iCiAgICAgICAgICAgICAgXQogICAgICAgICAgICBdLAogICAgICAgICAgICAibnVsbEFzIjogIlRSVUUiCiAgICAgICAgICB9LAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,2],"DIGESTS":["email.keyword","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"email.keyword"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_regex.yaml index a1c7349ac3f..7ac431a8b1d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_regex.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[REGEXP_CONTAINS($10, '^[A-Z][a-z]+$':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->REGEXP_CONTAINS($10, '^[A-Z][a-z]+$':VARCHAR), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAl3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJsYXN0bmFtZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydAFCewogICJvcCI6IHsKICAgICJuYW1lIjogIlJFR0VYUF9DT05UQUlOUyIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiAwLAogICAgICAibmFtZSI6ICIkMCIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIl5bQS1aXVthLXpdKyQiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACGxhc3RuYW1lc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->REGEXP_CONTAINS($10, '^[A-Z][a-z]+$':VARCHAR), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBknsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["lastname.keyword","^[A-Z][a-z]+$"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_negated.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_negated.yaml index e94f8ab11c3..0dfaa8b267e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_negated.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_regex_negated.yaml @@ -6,4 +6,4 @@ calcite: LogicalFilter(condition=[NOT(REGEXP_CONTAINS($10, '.*son$':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->NOT(REGEXP_CONTAINS($10, '.*son$':VARCHAR)), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAl3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJsYXN0bmFtZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydAHzewogICJvcCI6IHsKICAgICJuYW1lIjogIk5PVCIsCiAgICAia2luZCI6ICJOT1QiLAogICAgInN5bnRheCI6ICJQUkVGSVgiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMCwKICAgICAgICAgICJuYW1lIjogIiQwIgogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAiLipzb24kIiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQACGxhc3RuYW1lc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4ADH5xAH4AEHQABlNUUklOR35xAH4AFHQAB0tleXdvcmRxAH4AGXh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->NOT(REGEXP_CONTAINS($10, '.*son$':VARCHAR)), LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCWnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJOT1QiLAogICAgImtpbmQiOiAiTk9UIiwKICAgICJzeW50YXgiOiAiUFJFRklYIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX0NPTlRBSU5TIiwKICAgICAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["lastname.keyword",".*son$"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.json deleted file mode 100644 index ae0ce763383..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0])\n LogicalFilter(condition=[REGEXP_CONTAINS($0, 'hello':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]], PushDownContext=[[PROJECT->[name], SCRIPT->REGEXP_CONTAINS($0, 'hello':VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAk3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJuYW1lIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0ATp7CiAgIm9wIjogewogICAgIm5hbWUiOiAiUkVHRVhQX0NPTlRBSU5TIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAiaGVsbG8iLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAAXQABG5hbWVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AC3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+ABF0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAYeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAaAAAAAHNxAH4AAAAAAAN3BAAAAAB4eHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},\"_source\":{\"includes\":[\"name\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.yaml new file mode 100644 index 00000000000..ace954902a4 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_regexp_match_in_where.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(name=[$0]) + LogicalFilter(condition=[REGEXP_CONTAINS($0, 'hello':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_strings]], PushDownContext=[[PROJECT->[name], SCRIPT->REGEXP_CONTAINS($0, 'hello':VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBknsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2],"DIGESTS":["name","hello"]}},"boost":1.0}},"_source":{"includes":["name"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.yaml index 87f2c9ab813..a5f43d257ce 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[>(CHAR_LENGTH($2), 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address], SCRIPT->>(CHAR_LENGTH($0), 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), address_length], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AbN7CiAgIm9wIjogewogICAgIm5hbWUiOiAiPiIsCiAgICAia2luZCI6ICJHUkVBVEVSX1RIQU4iLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdhZGRyZXNzc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"_source":{"includes":["address"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"address_length":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AKZ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0hBUl9MRU5HVEgiLAogICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAAHYWRkcmVzc3NyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgALeHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEXQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABh4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABoAAAAAc3EAfgAAAAAAA3cEAAAAAHh4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address], SCRIPT->>(CHAR_LENGTH($0), 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), address_length], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCIXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI+IiwKICAgICJraW5kIjogIkdSRUFURVJfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2],"DIGESTS":["address",0]}},"boost":1.0}},"_source":{"includes":["address"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"address_length":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1],"DIGESTS":["address"]}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_skip_script_encoding.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_skip_script_encoding.json index 5844a594d72..0bc349d23dd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_skip_script_encoding.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_skip_script_encoding.json @@ -1,7 +1,7 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(firstname=[$1], age=[$8], address=[$2])\n LogicalFilter(condition=[AND(=($2, '671 Bristol Street'), =(-($8, 2), 30))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, address, age], SCRIPT->AND(=($1, '671 Bristol Street'), =(-($2, 2), 30)), PROJECT->[firstname, age, address], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"{\\\\n \\\\\\\"op\\\\\\\": {\\\\n \\\\\\\"name\\\\\\\": \\\\\\\"=\\\\\\\",\\\\n \\\\\\\"kind\\\\\\\": \\\\\\\"EQUALS\\\\\\\",\\\\n \\\\\\\"syntax\\\\\\\": \\\\\\\"BINARY\\\\\\\"\\\\n },\\\\n \\\\\\\"operands\\\\\\\": [\\\\n {\\\\n \\\\\\\"input\\\\\\\": 0,\\\\n \\\\\\\"name\\\\\\\": \\\\\\\"$0\\\\\\\"\\\\n },\\\\n {\\\\n \\\\\\\"literal\\\\\\\": \\\\\\\"671 Bristol Street\\\\\\\",\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"VARCHAR\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": false,\\\\n \\\\\\\"precision\\\\\\\": -1\\\\n }\\\\n }\\\\n ]\\\\n}\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"{\\\\n \\\\\\\"op\\\\\\\": {\\\\n \\\\\\\"name\\\\\\\": \\\\\\\"=\\\\\\\",\\\\n \\\\\\\"kind\\\\\\\": \\\\\\\"EQUALS\\\\\\\",\\\\n \\\\\\\"syntax\\\\\\\": \\\\\\\"BINARY\\\\\\\"\\\\n },\\\\n \\\\\\\"operands\\\\\\\": [\\\\n {\\\\n \\\\\\\"op\\\\\\\": {\\\\n \\\\\\\"name\\\\\\\": \\\\\\\"-\\\\\\\",\\\\n \\\\\\\"kind\\\\\\\": \\\\\\\"MINUS\\\\\\\",\\\\n \\\\\\\"syntax\\\\\\\": \\\\\\\"BINARY\\\\\\\"\\\\n },\\\\n \\\\\\\"operands\\\\\\\": [\\\\n {\\\\n \\\\\\\"input\\\\\\\": 0,\\\\n \\\\\\\"name\\\\\\\": \\\\\\\"$0\\\\\\\"\\\\n },\\\\n {\\\\n \\\\\\\"literal\\\\\\\": 2,\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"INTEGER\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": false\\\\n }\\\\n }\\\\n ],\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"BIGINT\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": true\\\\n }\\\\n },\\\\n {\\\\n \\\\\\\"literal\\\\\\\": 30,\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"INTEGER\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": false\\\\n }\\\\n }\\\\n ]\\\\n}\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"firstname\",\"age\",\"address\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, address, age], SCRIPT->AND(=($1, '671 Bristol Street'), =(-($2, 2), 30)), PROJECT->[firstname, age, address], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"{\\\\n \\\\\\\"op\\\\\\\": {\\\\n \\\\\\\"name\\\\\\\": \\\\\\\"=\\\\\\\",\\\\n \\\\\\\"kind\\\\\\\": \\\\\\\"EQUALS\\\\\\\",\\\\n \\\\\\\"syntax\\\\\\\": \\\\\\\"BINARY\\\\\\\"\\\\n },\\\\n \\\\\\\"operands\\\\\\\": [\\\\n {\\\\n \\\\\\\"dynamicParam\\\\\\\": 0,\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"VARCHAR\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": true,\\\\n \\\\\\\"precision\\\\\\\": -1\\\\n }\\\\n },\\\\n {\\\\n \\\\\\\"dynamicParam\\\\\\\": 1,\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"VARCHAR\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": false,\\\\n \\\\\\\"precision\\\\\\\": -1\\\\n }\\\\n }\\\\n ]\\\\n}\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*,\"SOURCES\":[1,2],\"DIGESTS\":[\"address\",\"671 Bristol Street\"]}},\"boost\":1.0}},{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"{\\\\n \\\\\\\"op\\\\\\\": {\\\\n \\\\\\\"name\\\\\\\": \\\\\\\"=\\\\\\\",\\\\n \\\\\\\"kind\\\\\\\": \\\\\\\"EQUALS\\\\\\\",\\\\n \\\\\\\"syntax\\\\\\\": \\\\\\\"BINARY\\\\\\\"\\\\n },\\\\n \\\\\\\"operands\\\\\\\": [\\\\n {\\\\n \\\\\\\"op\\\\\\\": {\\\\n \\\\\\\"name\\\\\\\": \\\\\\\"-\\\\\\\",\\\\n \\\\\\\"kind\\\\\\\": \\\\\\\"MINUS\\\\\\\",\\\\n \\\\\\\"syntax\\\\\\\": \\\\\\\"BINARY\\\\\\\"\\\\n },\\\\n \\\\\\\"operands\\\\\\\": [\\\\n {\\\\n \\\\\\\"dynamicParam\\\\\\\": 0,\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"BIGINT\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": true\\\\n }\\\\n },\\\\n {\\\\n \\\\\\\"dynamicParam\\\\\\\": 1,\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"INTEGER\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": false\\\\n }\\\\n }\\\\n ],\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"BIGINT\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": true\\\\n }\\\\n },\\\\n {\\\\n \\\\\\\"dynamicParam\\\\\\\": 2,\\\\n \\\\\\\"type\\\\\\\": {\\\\n \\\\\\\"type\\\\\\\": \\\\\\\"INTEGER\\\\\\\",\\\\n \\\\\\\"nullable\\\\\\\": false\\\\n }\\\\n }\\\\n ]\\\\n}\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*,\"SOURCES\":[0,2,2],\"DIGESTS\":[\"age\",2,30]}},\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"firstname\",\"age\",\"address\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n", "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n return v1stashed.scan();\n}\n\n\npublic Class getElementType() {\n return java.lang.Object[].class;\n}\n\n\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml index ee2254f5420..faaaa2018ef 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_complex_and_simple_expr.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..12=[{inputs}], expr#13=[+($t10, $t7)], expr#14=[1], expr#15=[+($t7, $t14)], proj#0..13=[{exprs}], $f14=[$t15]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST, balance ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA0HsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IGZhbHNlCn10AARleHBydADFewogICJvcCI6IHsKICAgICJuYW1lIjogIisiLAogICAgImtpbmQiOiAiUExVUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9LAogICAgewogICAgICAiaW5wdXQiOiAxLAogICAgICAibmFtZSI6ICIkMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAJ0AAdiYWxhbmNlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QAA2FnZX5xAH4ACnQAB0lOVEVHRVJ4eA==\"}","lang":"opensearch_compounded_script","params":{"NULL_DIRECTION":"FIRST","DIRECTION":"ASCENDING","utcTimestamp": 0}},"type":"number","order":"asc"}},{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT_EXPR->[+($10, $7) ASCENDING NULLS_FIRST, balance ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRHsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":false,"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["age","balance"]}},"type":"number","order":"asc"}},{"balance":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json index 054bc203640..c9959ce01da 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_type_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$8])\n LogicalSort(sort0=[$17], dir0=[ASC-nulls-first])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], $f17=[SAFE_CAST($8)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], SORT_EXPR->[SAFE_CAST($0) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAensKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQA3XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTQUZFX0NBU1QiLAogICAgImtpbmQiOiAiU0FGRV9DQVNUIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMCwKICAgICAgIm5hbWUiOiAiJDAiCiAgICB9CiAgXSwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAABdAADYWdlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3h4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"NULL_DIRECTION\":\"FIRST\",\"DIRECTION\":\"ASCENDING\",\"utcTimestamp\":*}},\"type\":\"number\",\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], SORT_EXPR->[SAFE_CAST($0) ASCENDING NULLS_FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXQBHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTQUZFX0NBU1QiLAogICAgImtpbmQiOiAiU0FGRV9DQVNUIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXSwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkRPVUJMRSIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfQp9\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"MISSING_MAX\":false,\"utcTimestamp\":*,\"SOURCES\":[0],\"DIGESTS\":[\"age\"]}},\"type\":\"number\",\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } -} \ No newline at end of file +} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml index c3f7ffe2a93..9eb6e726fbf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml @@ -5,4 +5,4 @@ calcite: LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAlnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Aap7CiAgIm9wIjogewogICAgIm5hbWUiOiAiSUxJS0UiLAogICAgImtpbmQiOiAiTElLRSIsCiAgICAic3ludGF4IjogIlNQRUNJQUwiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJpbnB1dCI6IDAsCiAgICAgICJuYW1lIjogIiQwIgogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAiJUhvbG1lcyUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogOAogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJcXCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAxCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AAdhZGRyZXNzc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+AAt4cH5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgARdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AGHhwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AGgAAAABzcQB+AAAAAAADdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJTElLRSIsCiAgICAia2luZCI6ICJMSUtFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogOAogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/udf_geoip_in_agg_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite/udf_geoip_in_agg_pushed.yaml index baf08f483a8..71a1e22775c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/udf_geoip_in_agg_pushed.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/udf_geoip_in_agg_pushed.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(info.city=[ITEM(GEOIP('my-datasource':VARCHAR, $0), 'city')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), info.city], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"info.city":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAknsKICAiZmllbGRzIjogWwogICAgewogICAgICAidWR0IjogIkVYUFJfSVAiLAogICAgICAidHlwZSI6ICJPVEhFUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogImhvc3QiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQETnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJVEVNIiwKICAgICJraW5kIjogIklURU0iLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiR0VPSVAiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAibXktZGF0YXNvdXJjZSIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiTUFQIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAia2V5IjogewogICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgIH0sCiAgICAgICAgInZhbHVlIjogewogICAgICAgICAgInR5cGUiOiAiQU5ZIiwKICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAgICAgInNjYWxlIjogLTIxNDc0ODM2NDgKICAgICAgICB9CiAgICAgIH0sCiAgICAgICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAgICAgImR5bmFtaWMiOiBmYWxzZQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAiY2l0eSIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiA0CiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AARob3N0fnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAACSVB4eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), info.city], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"info.city":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQErXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJVEVNIiwKICAgICJraW5kIjogIklURU0iLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiR0VPSVAiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ1ZHQiOiAiRVhQUl9JUCIsCiAgICAgICAgICAgICJ0eXBlIjogIk9USEVSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJNQVAiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJrZXkiOiB7CiAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgfSwKICAgICAgICAidmFsdWUiOiB7CiAgICAgICAgICAidHlwZSI6ICJBTlkiLAogICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICAgICAic2NhbGUiOiAtMjE0NzQ4MzY0OAogICAgICAgIH0KICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDQKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0,2],"DIGESTS":["my-datasource","host","city"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.yaml index e7c0af846f0..6468a521e38 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eval_min.yaml @@ -6,4 +6,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[2], expr#19=[3], expr#20=['banana':VARCHAR], expr#21=[SCALAR_MIN($t17, $t18, $t19, $t8, $t20)], proj#0..10=[{exprs}], new=[$t21]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_script_ip_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_script_ip_push.json deleted file mode 100644 index 545630e2166..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_script_ip_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(host=[$0])\n LogicalFilter(condition=[CIDRMATCH($0, '0.0.0.0/24':VARCHAR)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..11=[{inputs}], expr#12=['0.0.0.0/24':VARCHAR], expr#13=[CIDRMATCH($t0, $t12)], host=[$t0], $condition=[$t13])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_script_ip_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_script_ip_push.yaml new file mode 100644 index 00000000000..af7a44c2f6d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_script_ip_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(host=[$0]) + LogicalFilter(condition=[CIDRMATCH($0, '0.0.0.0/24':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..11=[{inputs}], expr#12=['0.0.0.0/24':VARCHAR], expr#13=[CIDRMATCH($t0, $t12)], host=[$t0], $condition=[$t13]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]]) diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_agg_with_sum_enhancement.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_agg_with_sum_enhancement.json deleted file mode 100644 index e965b5b5dc2..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_agg_with_sum_enhancement.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[sum(balance), sum(balance + 100), sum(balance - 100), sum(balance * 100), sum(balance / 100), gender]" - }, - "children": [ - { - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"sum(balance)\":{\"sum\":{\"field\":\"balance\"}},\"sum(balance + 100)\":{\"sum\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IAMW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLlJlZmVyZW5jZUV4cHJlc3Npb26rRO9cEgeF1gIABEwABGF0dHJ0ABJMamF2YS9sYW5nL1N0cmluZztMAAVwYXRoc3EAfgABTAAHcmF3UGF0aHEAfgALTAAEdHlwZXEAfgAFeHB0AAdiYWxhbmNlc3IAGmphdmEudXRpbC5BcnJheXMkQXJyYXlMaXN02aQ8vs2IBtICAAFbAAFhdAATW0xqYXZhL2xhbmcvT2JqZWN0O3hwdXIAE1tMamF2YS5sYW5nLlN0cmluZzut0lbn6R17RwIAAHhwAAAAAXEAfgANcQB+AA1+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05Hc3IAL29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLkxpdGVyYWxFeHByZXNzaW9uRUIt8IzHgiQCAAFMAAlleHByVmFsdWV0AClMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3hwc3IALm9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLm1vZGVsLkV4cHJJbnRlZ2VyVmFsdWWmbAeZibduQwIAAHhyADVvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS5tb2RlbC5BYnN0cmFjdEV4cHJOdW1iZXJWYWx1ZTT0YurFpzI7AgABTAAFdmFsdWV0ABJMamF2YS9sYW5nL051bWJlcjt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwc3IAEWphdmEubGFuZy5JbnRlZ2VyEuKgpPeBhzgCAAFJAAV2YWx1ZXhyABBqYXZhLmxhbmcuTnVtYmVyhqyVHQuU4IsCAAB4cAAAAGR4c3IAM29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uTmFtZQuoOE3O9meXAgABTAAMZnVuY3Rpb25OYW1lcQB+AAt4cHQAAStxAH4ACXNyACFqYXZhLmxhbmcuaW52b2tlLlNlcmlhbGl6ZWRMYW1iZGFvYdCULCk2hQIACkkADmltcGxNZXRob2RLaW5kWwAMY2FwdHVyZWRBcmdzcQB+AA9MAA5jYXB0dXJpbmdDbGFzc3QAEUxqYXZhL2xhbmcvQ2xhc3M7TAAYZnVuY3Rpb25hbEludGVyZmFjZUNsYXNzcQB+AAtMAB1mdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kTmFtZXEAfgALTAAiZnVuY3Rpb25hbEludGVyZmFjZU1ldGhvZFNpZ25hdHVyZXEAfgALTAAJaW1wbENsYXNzcQB+AAtMAA5pbXBsTWV0aG9kTmFtZXEAfgALTAATaW1wbE1ldGhvZFNpZ25hdHVyZXEAfgALTAAWaW5zdGFudGlhdGVkTWV0aG9kVHlwZXEAfgALeHAAAAAGdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAXNxAH4AJQAAAAZ1cQB+ACgAAAABc3EAfgAlAAAABnVxAH4AKAAAAAB2cgBEb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24ub3BlcmF0b3IuYXJ0aG1ldGljLkFyaXRobWV0aWNGdW5jdGlvbnMAAAAAAAAAAAAAAHhwdAA9b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbnQABWFwcGx5dAA4KExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDt0AERvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9vcGVyYXRvci9hcnRobWV0aWMvQXJpdGhtZXRpY0Z1bmN0aW9uc3QAGWxhbWJkYSRhZGRCYXNlJDdkOTBhYzAyJDR0AH0oTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgA1dnIAMm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uRFNMAAAAAAAAAAAAAAB4cHEAfgAwcQB+ADFxAH4AMnQAMm9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uRFNMdAAlbGFtYmRhJG51bGxNaXNzaW5nSGFuZGxpbmckYTUwMDUyODEkMXQAvChMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbjtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7cQB+ADVxAH4AN3QAPm9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZVRyaUZ1bmN0aW9ucQB+ADF0AEooTGphdmEvbGFuZy9PYmplY3Q7TGphdmEvbGFuZy9PYmplY3Q7TGphdmEvbGFuZy9PYmplY3Q7KUxqYXZhL2xhbmcvT2JqZWN0O3EAfgA4dAAWbGFtYmRhJGltcGwkYTBmYjM0ZDQkMXQA9yhMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbjtMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vRnVuY3Rpb25Qcm9wZXJ0aWVzO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTt0ALgoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7cQB+ACNzcgA5b3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uRnVuY3Rpb25Qcm9wZXJ0aWVzzzxZY5uo+bMCAANMAA1jdXJyZW50Wm9uZUlkdAASTGphdmEvdGltZS9ab25lSWQ7TAAKbm93SW5zdGFudHQAE0xqYXZhL3RpbWUvSW5zdGFudDtMAAlxdWVyeVR5cGV0ACdMb3JnL29wZW5zZWFyY2gvc3FsL2V4ZWN1dG9yL1F1ZXJ5VHlwZTt4cHNyAA1qYXZhLnRpbWUuU2VylV2EuhsiSLIMAAB4cHcCCAB4c3EAfgBFdw0CAAAAAGiQdxM68QzgeH5yACVvcmcub3BlbnNlYXJjaC5zcWwuZXhlY3V0b3IuUXVlcnlUeXBlAAAAAAAAAAASAAB4cQB+ABR0AANQUExxAH4AFQ==\\\"}\",\"lang\":\"opensearch_compounded_script\"}}},\"sum(balance - 100)\":{\"sum\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IAMW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLlJlZmVyZW5jZUV4cHJlc3Npb26rRO9cEgeF1gIABEwABGF0dHJ0ABJMamF2YS9sYW5nL1N0cmluZztMAAVwYXRoc3EAfgABTAAHcmF3UGF0aHEAfgALTAAEdHlwZXEAfgAFeHB0AAdiYWxhbmNlc3IAGmphdmEudXRpbC5BcnJheXMkQXJyYXlMaXN02aQ8vs2IBtICAAFbAAFhdAATW0xqYXZhL2xhbmcvT2JqZWN0O3hwdXIAE1tMamF2YS5sYW5nLlN0cmluZzut0lbn6R17RwIAAHhwAAAAAXEAfgANcQB+AA1+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05Hc3IAL29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLkxpdGVyYWxFeHByZXNzaW9uRUIt8IzHgiQCAAFMAAlleHByVmFsdWV0AClMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3hwc3IALm9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLm1vZGVsLkV4cHJJbnRlZ2VyVmFsdWWmbAeZibduQwIAAHhyADVvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS5tb2RlbC5BYnN0cmFjdEV4cHJOdW1iZXJWYWx1ZTT0YurFpzI7AgABTAAFdmFsdWV0ABJMamF2YS9sYW5nL051bWJlcjt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwc3IAEWphdmEubGFuZy5JbnRlZ2VyEuKgpPeBhzgCAAFJAAV2YWx1ZXhyABBqYXZhLmxhbmcuTnVtYmVyhqyVHQuU4IsCAAB4cAAAAGR4c3IAM29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uTmFtZQuoOE3O9meXAgABTAAMZnVuY3Rpb25OYW1lcQB+AAt4cHQAAS1xAH4ACXNyACFqYXZhLmxhbmcuaW52b2tlLlNlcmlhbGl6ZWRMYW1iZGFvYdCULCk2hQIACkkADmltcGxNZXRob2RLaW5kWwAMY2FwdHVyZWRBcmdzcQB+AA9MAA5jYXB0dXJpbmdDbGFzc3QAEUxqYXZhL2xhbmcvQ2xhc3M7TAAYZnVuY3Rpb25hbEludGVyZmFjZUNsYXNzcQB+AAtMAB1mdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kTmFtZXEAfgALTAAiZnVuY3Rpb25hbEludGVyZmFjZU1ldGhvZFNpZ25hdHVyZXEAfgALTAAJaW1wbENsYXNzcQB+AAtMAA5pbXBsTWV0aG9kTmFtZXEAfgALTAATaW1wbE1ldGhvZFNpZ25hdHVyZXEAfgALTAAWaW5zdGFudGlhdGVkTWV0aG9kVHlwZXEAfgALeHAAAAAGdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAXNxAH4AJQAAAAZ1cQB+ACgAAAABc3EAfgAlAAAABnVxAH4AKAAAAAB2cgBEb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24ub3BlcmF0b3IuYXJ0aG1ldGljLkFyaXRobWV0aWNGdW5jdGlvbnMAAAAAAAAAAAAAAHhwdAA9b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbnQABWFwcGx5dAA4KExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDt0AERvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9vcGVyYXRvci9hcnRobWV0aWMvQXJpdGhtZXRpY0Z1bmN0aW9uc3QAHmxhbWJkYSRzdWJ0cmFjdEJhc2UkN2Q5MGFjMDIkNHQAfShMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7cQB+ADV2cgAyb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uRnVuY3Rpb25EU0wAAAAAAAAAAAAAAHhwcQB+ADBxAH4AMXEAfgAydAAyb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vRnVuY3Rpb25EU0x0ACVsYW1iZGEkbnVsbE1pc3NpbmdIYW5kbGluZyRhNTAwNTI4MSQxdAC8KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4ANXEAfgA3dAA+b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlVHJpRnVuY3Rpb25xAH4AMXQASihMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDspTGphdmEvbGFuZy9PYmplY3Q7cQB+ADh0ABZsYW1iZGEkaW1wbCRhMGZiMzRkNCQxdAD3KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3QAuChMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vRnVuY3Rpb25Qcm9wZXJ0aWVzO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AI3NyADlvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvblByb3BlcnRpZXPPPFljm6j5swIAA0wADWN1cnJlbnRab25lSWR0ABJMamF2YS90aW1lL1pvbmVJZDtMAApub3dJbnN0YW50dAATTGphdmEvdGltZS9JbnN0YW50O0wACXF1ZXJ5VHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhlY3V0b3IvUXVlcnlUeXBlO3hwc3IADWphdmEudGltZS5TZXKVXYS6GyJIsgwAAHhwdwIIAHhzcQB+AEV3DQIAAAAAaJB3EzrxDOB4fnIAJW9yZy5vcGVuc2VhcmNoLnNxbC5leGVjdXRvci5RdWVyeVR5cGUAAAAAAAAAABIAAHhxAH4AFHQAA1BQTHEAfgAV\\\"}\",\"lang\":\"opensearch_compounded_script\"}}},\"sum(balance * 100)\":{\"sum\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IAMW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLlJlZmVyZW5jZUV4cHJlc3Npb26rRO9cEgeF1gIABEwABGF0dHJ0ABJMamF2YS9sYW5nL1N0cmluZztMAAVwYXRoc3EAfgABTAAHcmF3UGF0aHEAfgALTAAEdHlwZXEAfgAFeHB0AAdiYWxhbmNlc3IAGmphdmEudXRpbC5BcnJheXMkQXJyYXlMaXN02aQ8vs2IBtICAAFbAAFhdAATW0xqYXZhL2xhbmcvT2JqZWN0O3hwdXIAE1tMamF2YS5sYW5nLlN0cmluZzut0lbn6R17RwIAAHhwAAAAAXEAfgANcQB+AA1+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05Hc3IAL29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLkxpdGVyYWxFeHByZXNzaW9uRUIt8IzHgiQCAAFMAAlleHByVmFsdWV0AClMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3hwc3IALm9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLm1vZGVsLkV4cHJJbnRlZ2VyVmFsdWWmbAeZibduQwIAAHhyADVvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS5tb2RlbC5BYnN0cmFjdEV4cHJOdW1iZXJWYWx1ZTT0YurFpzI7AgABTAAFdmFsdWV0ABJMamF2YS9sYW5nL051bWJlcjt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwc3IAEWphdmEubGFuZy5JbnRlZ2VyEuKgpPeBhzgCAAFJAAV2YWx1ZXhyABBqYXZhLmxhbmcuTnVtYmVyhqyVHQuU4IsCAAB4cAAAAGR4c3IAM29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uTmFtZQuoOE3O9meXAgABTAAMZnVuY3Rpb25OYW1lcQB+AAt4cHQAASpxAH4ACXNyACFqYXZhLmxhbmcuaW52b2tlLlNlcmlhbGl6ZWRMYW1iZGFvYdCULCk2hQIACkkADmltcGxNZXRob2RLaW5kWwAMY2FwdHVyZWRBcmdzcQB+AA9MAA5jYXB0dXJpbmdDbGFzc3QAEUxqYXZhL2xhbmcvQ2xhc3M7TAAYZnVuY3Rpb25hbEludGVyZmFjZUNsYXNzcQB+AAtMAB1mdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kTmFtZXEAfgALTAAiZnVuY3Rpb25hbEludGVyZmFjZU1ldGhvZFNpZ25hdHVyZXEAfgALTAAJaW1wbENsYXNzcQB+AAtMAA5pbXBsTWV0aG9kTmFtZXEAfgALTAATaW1wbE1ldGhvZFNpZ25hdHVyZXEAfgALTAAWaW5zdGFudGlhdGVkTWV0aG9kVHlwZXEAfgALeHAAAAAGdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAXNxAH4AJQAAAAZ1cQB+ACgAAAABc3EAfgAlAAAABnVxAH4AKAAAAAB2cgBEb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24ub3BlcmF0b3IuYXJ0aG1ldGljLkFyaXRobWV0aWNGdW5jdGlvbnMAAAAAAAAAAAAAAHhwdAA9b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbnQABWFwcGx5dAA4KExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDt0AERvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9vcGVyYXRvci9hcnRobWV0aWMvQXJpdGhtZXRpY0Z1bmN0aW9uc3QAHmxhbWJkYSRtdWx0aXBseUJhc2UkN2Q5MGFjMDIkNHQAfShMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7cQB+ADV2cgAyb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uRnVuY3Rpb25EU0wAAAAAAAAAAAAAAHhwcQB+ADBxAH4AMXEAfgAydAAyb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vRnVuY3Rpb25EU0x0ACVsYW1iZGEkbnVsbE1pc3NpbmdIYW5kbGluZyRhNTAwNTI4MSQxdAC8KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4ANXEAfgA3dAA+b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlVHJpRnVuY3Rpb25xAH4AMXQASihMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDtMamF2YS9sYW5nL09iamVjdDspTGphdmEvbGFuZy9PYmplY3Q7cQB+ADh0ABZsYW1iZGEkaW1wbCRhMGZiMzRkNCQxdAD3KExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVCaUZ1bmN0aW9uO0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9GdW5jdGlvblByb3BlcnRpZXM7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3QAuChMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vRnVuY3Rpb25Qcm9wZXJ0aWVzO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtxAH4AI3NyADlvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvblByb3BlcnRpZXPPPFljm6j5swIAA0wADWN1cnJlbnRab25lSWR0ABJMamF2YS90aW1lL1pvbmVJZDtMAApub3dJbnN0YW50dAATTGphdmEvdGltZS9JbnN0YW50O0wACXF1ZXJ5VHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZXhlY3V0b3IvUXVlcnlUeXBlO3hwc3IADWphdmEudGltZS5TZXKVXYS6GyJIsgwAAHhwdwIIAHhzcQB+AEV3DQIAAAAAaJB3EzrxDOB4fnIAJW9yZy5vcGVuc2VhcmNoLnNxbC5leGVjdXRvci5RdWVyeVR5cGUAAAAAAAAAABIAAHhxAH4AFHQAA1BQTHEAfgAV\\\"}\",\"lang\":\"opensearch_compounded_script\"}}},\"sum(balance / 100)\":{\"sum\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"rO0ABXNyADRvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5GdW5jdGlvbkRTTCQzHWCy3iOeynUCAAVMAA12YWwkYXJndW1lbnRzdAAQTGphdmEvdXRpbC9MaXN0O0wADHZhbCRmdW5jdGlvbnQAQExvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9mdW5jdGlvbi9TZXJpYWxpemFibGVUcmlGdW5jdGlvbjtMABB2YWwkZnVuY3Rpb25OYW1ldAA1TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uTmFtZTtMABZ2YWwkZnVuY3Rpb25Qcm9wZXJ0aWVzdAA7TG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMAA52YWwkcmV0dXJuVHlwZXQAJ0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJUeXBlO3hyADBvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5GdW5jdGlvbkV4cHJlc3Npb26yKjDT3HVqewIAAkwACWFyZ3VtZW50c3EAfgABTAAMZnVuY3Rpb25OYW1lcQB+AAN4cHNyABNqYXZhLnV0aWwuQXJyYXlMaXN0eIHSHZnHYZ0DAAFJAARzaXpleHAAAAACdwQAAAACc3IAMW9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLlJlZmVyZW5jZUV4cHJlc3Npb26rRO9cEgeF1gIABEwABGF0dHJ0ABJMamF2YS9sYW5nL1N0cmluZztMAAVwYXRoc3EAfgABTAAHcmF3UGF0aHEAfgALTAAEdHlwZXEAfgAFeHB0AAdiYWxhbmNlc3IAGmphdmEudXRpbC5BcnJheXMkQXJyYXlMaXN02aQ8vs2IBtICAAFbAAFhdAATW0xqYXZhL2xhbmcvT2JqZWN0O3hwdXIAE1tMamF2YS5sYW5nLlN0cmluZzut0lbn6R17RwIAAHhwAAAAAXEAfgANcQB+AA1+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05Hc3IAL29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLkxpdGVyYWxFeHByZXNzaW9uRUIt8IzHgiQCAAFMAAlleHByVmFsdWV0AClMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3hwc3IALm9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLm1vZGVsLkV4cHJJbnRlZ2VyVmFsdWWmbAeZibduQwIAAHhyADVvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS5tb2RlbC5BYnN0cmFjdEV4cHJOdW1iZXJWYWx1ZTT0YurFpzI7AgABTAAFdmFsdWV0ABJMamF2YS9sYW5nL051bWJlcjt4cgAvb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEubW9kZWwuQWJzdHJhY3RFeHByVmFsdWXJa7V2BhREigIAAHhwc3IAEWphdmEubGFuZy5JbnRlZ2VyEuKgpPeBhzgCAAFJAAV2YWx1ZXhyABBqYXZhLmxhbmcuTnVtYmVyhqyVHQuU4IsCAAB4cAAAAGR4c3IAM29yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uTmFtZQuoOE3O9meXAgABTAAMZnVuY3Rpb25OYW1lcQB+AAt4cHQAAS9xAH4ACXNyACFqYXZhLmxhbmcuaW52b2tlLlNlcmlhbGl6ZWRMYW1iZGFvYdCULCk2hQIACkkADmltcGxNZXRob2RLaW5kWwAMY2FwdHVyZWRBcmdzcQB+AA9MAA5jYXB0dXJpbmdDbGFzc3QAEUxqYXZhL2xhbmcvQ2xhc3M7TAAYZnVuY3Rpb25hbEludGVyZmFjZUNsYXNzcQB+AAtMAB1mdW5jdGlvbmFsSW50ZXJmYWNlTWV0aG9kTmFtZXEAfgALTAAiZnVuY3Rpb25hbEludGVyZmFjZU1ldGhvZFNpZ25hdHVyZXEAfgALTAAJaW1wbENsYXNzcQB+AAtMAA5pbXBsTWV0aG9kTmFtZXEAfgALTAATaW1wbE1ldGhvZFNpZ25hdHVyZXEAfgALTAAWaW5zdGFudGlhdGVkTWV0aG9kVHlwZXEAfgALeHAAAAAGdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAXNxAH4AJQAAAAZ1cQB+ACgAAAABc3EAfgAlAAAABnVxAH4AKAAAAAB2cgBEb3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24ub3BlcmF0b3IuYXJ0aG1ldGljLkFyaXRobWV0aWNGdW5jdGlvbnMAAAAAAAAAAAAAAHhwdAA9b3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbnQABWFwcGx5dAA4KExqYXZhL2xhbmcvT2JqZWN0O0xqYXZhL2xhbmcvT2JqZWN0OylMamF2YS9sYW5nL09iamVjdDt0AERvcmcvb3BlbnNlYXJjaC9zcWwvZXhwcmVzc2lvbi9vcGVyYXRvci9hcnRobWV0aWMvQXJpdGhtZXRpY0Z1bmN0aW9uc3QAHGxhbWJkYSRkaXZpZGVCYXNlJDdkOTBhYzAyJDR0AH0oTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlOylMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO3EAfgA1dnIAMm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLkZ1bmN0aW9uRFNMAAAAAAAAAAAAAAB4cHEAfgAwcQB+ADFxAH4AMnQAMm9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uRFNMdAAlbGFtYmRhJG51bGxNaXNzaW5nSGFuZGxpbmckYTUwMDUyODEkMXQAvChMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbjtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7cQB+ADVxAH4AN3QAPm9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL1NlcmlhbGl6YWJsZVRyaUZ1bmN0aW9ucQB+ADF0AEooTGphdmEvbGFuZy9PYmplY3Q7TGphdmEvbGFuZy9PYmplY3Q7TGphdmEvbGFuZy9PYmplY3Q7KUxqYXZhL2xhbmcvT2JqZWN0O3EAfgA4dAAWbGFtYmRhJGltcGwkYTBmYjM0ZDQkMXQA9yhMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vU2VyaWFsaXphYmxlQmlGdW5jdGlvbjtMb3JnL29wZW5zZWFyY2gvc3FsL2V4cHJlc3Npb24vZnVuY3Rpb24vRnVuY3Rpb25Qcm9wZXJ0aWVzO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7TG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTspTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL21vZGVsL0V4cHJWYWx1ZTt0ALgoTG9yZy9vcGVuc2VhcmNoL3NxbC9leHByZXNzaW9uL2Z1bmN0aW9uL0Z1bmN0aW9uUHJvcGVydGllcztMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvbW9kZWwvRXhwclZhbHVlO0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7KUxvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS9tb2RlbC9FeHByVmFsdWU7cQB+ACNzcgA5b3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uRnVuY3Rpb25Qcm9wZXJ0aWVzzzxZY5uo+bMCAANMAA1jdXJyZW50Wm9uZUlkdAASTGphdmEvdGltZS9ab25lSWQ7TAAKbm93SW5zdGFudHQAE0xqYXZhL3RpbWUvSW5zdGFudDtMAAlxdWVyeVR5cGV0ACdMb3JnL29wZW5zZWFyY2gvc3FsL2V4ZWN1dG9yL1F1ZXJ5VHlwZTt4cHNyAA1qYXZhLnRpbWUuU2VylV2EuhsiSLIMAAB4cHcCCAB4c3EAfgBFdw0CAAAAAGiQdxM68QzgeH5yACVvcmcub3BlbnNlYXJjaC5zcWwuZXhlY3V0b3IuUXVlcnlUeXBlAAAAAAAAAAASAAB4cQB+ABR0AANQUExxAH4AFQ==\\\"}\",\"lang\":\"opensearch_compounded_script\"}}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - }, - "children": [] - } - ] - } -} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3595.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3595.yml index 581b1b2ca39..84fd0047d9c 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3595.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/3595.yml @@ -43,7 +43,7 @@ teardown: Content-Type: 'application/json' ppl: body: - query: 'source=logs | lookup region_info regionId' + query: 'source=logs | lookup region_info regionId | sort regionId' - match: {"total": 1} - match: {"schema": [{"name": "action", "type": "string"}, {"name": "regionId", "type": "string"}, {"name": "timestamp", "type": "timestamp"}, {"name": "regionName", "type": "string"}]} - match: {"datarows": [["login_attempt", "1", "2024-04-29 10:00:00", "us-east-1"]]} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GeoIpFunction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GeoIpFunction.java index 001ed064d1d..83b1915f6b5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GeoIpFunction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GeoIpFunction.java @@ -90,6 +90,12 @@ public Expression implement( dataSource, ipAddress.toString(), Collections.emptySet(), nodeClient); } + public static Map fetchIpEnrichment( + String dataSource, Object ipAddress, NodeClient nodeClient) { + return fetchIpEnrichment( + dataSource, ipAddress.toString(), Collections.emptySet(), nodeClient); + } + public static Map fetchIpEnrichment( String dataSource, ExprIpValue ipAddress, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 94134ae0b4b..7abb0c1b937 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -52,14 +52,12 @@ import java.util.Collections; import java.util.GregorianCalendar; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.function.Supplier; import lombok.Getter; -import org.apache.calcite.DataContext.Variable; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; @@ -69,7 +67,6 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUnknownAs; import org.apache.calcite.rex.RexVisitorImpl; -import org.apache.calcite.runtime.Hook; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlSyntax; @@ -108,6 +105,7 @@ import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.QueryStringQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.SimpleQueryStringQuery; import org.opensearch.sql.opensearch.storage.serde.RelJsonSerializer; +import org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper; import org.opensearch.sql.opensearch.storage.serde.SerializationWrapper; /** @@ -1449,9 +1447,9 @@ private static String ipValueForPushDown(String value) { public static class ScriptQueryExpression extends QueryExpression { private RexNode analyzedNode; - // use lambda to generate code lazily to avoid store generated code private final Supplier codeGenerator; - private final Map params; + private String generatedCode; + private final ScriptParameterHelper parameterHelper; public ScriptQueryExpression( RexNode rexNode, @@ -1468,11 +1466,21 @@ public ScriptQueryExpression( } accumulateScriptCount(1); RelJsonSerializer serializer = new RelJsonSerializer(cluster); + this.parameterHelper = new ScriptParameterHelper(rowType.getFieldList(), fieldTypes, params); this.codeGenerator = () -> SerializationWrapper.wrapWithLangType( - ScriptEngineType.CALCITE, serializer.serialize(rexNode, rowType, fieldTypes)); - this.params = params; + ScriptEngineType.CALCITE, serializer.serialize(rexNode, parameterHelper)); + } + + // For filter script, this method will be called after planning phase; + // For the agg-script, this will be called in planning phase to generate agg builder. + // TODO: make agg-script lazy as well + private String getOrCreateGeneratedCode() { + if (generatedCode == null) { + generatedCode = codeGenerator.get(); + } + return generatedCode; } @Override @@ -1481,19 +1489,12 @@ public QueryBuilder builder() { } public Script getScript() { - long currentTime = Hook.CURRENT_TIME.get(-1L); - if (currentTime < 0) { - throw new UnsupportedScriptException( - "ScriptQueryExpression requires a valid current time from hook, but it is not set"); - } - Map mergedParams = new LinkedHashMap<>(params); - mergedParams.put(Variable.UTC_TIMESTAMP.camelName, currentTime); return new Script( DEFAULT_SCRIPT_TYPE, COMPOUNDED_LANG_NAME, - codeGenerator.get(), + getOrCreateGeneratedCode(), Collections.emptyMap(), - mergedParams); + this.parameterHelper.getParameters()); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 3f6b53b6c59..88f15607a7a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -29,6 +29,7 @@ import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.externalize.RelWriterImpl; import org.apache.calcite.rel.hint.RelHint; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.metadata.RelMdUtil; @@ -94,7 +95,11 @@ public RelDataType deriveRowType() { @Override public RelWriter explainTerms(RelWriter pw) { - String explainString = pushDownContext + ", " + pushDownContext.getRequestBuilder(); + String explainString = String.valueOf(pushDownContext); + if (pw instanceof RelWriterImpl) { + // Only add request builder to the explain plan + explainString += ", " + pushDownContext.createRequestBuilder(); + } return super.explainTerms(pw) .itemIf("PushDownContext", explainString, !pushDownContext.isEmpty()); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index 20d8a6c34fd..bc5a289f465 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -112,7 +112,7 @@ public Result implement(EnumerableRelImplementor implementor, Prefer pref) { return new AbstractEnumerable<>() { @Override public Enumerator enumerator() { - OpenSearchRequestBuilder requestBuilder = getOrCreateRequestBuilder(); + OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); return new OpenSearchIndexEnumerator( osIndex.getClient(), getFieldPath(), @@ -129,28 +129,4 @@ private List getFieldPath() { .map(f -> osIndex.getAliasMapping().getOrDefault(f, f)) .toList(); } - - /** - * In some edge cases where the digests of more than one scan are the same, and then the Calcite - * planner will reuse the same scan along with the same PushDownContext inner it. However, the - * `OpenSearchRequestBuilder` inner `PushDownContext` is not reusable since it has status changed - * in the search process. - * - *

To avoid this issue and try to construct `OpenSearchRequestBuilder` as less as possible, - * this method will get and reuse the `OpenSearchRequestBuilder` in PushDownContext for the first - * time, and then construct new ones for the following invoking. - * - * @return OpenSearchRequestBuilder to be used by enumerator - */ - private volatile boolean isRequestBuilderUsedByEnumerator = false; - - private OpenSearchRequestBuilder getOrCreateRequestBuilder() { - synchronized (this.pushDownContext) { - if (isRequestBuilderUsedByEnumerator) { - return this.pushDownContext.createRequestBuilder(); - } - isRequestBuilderUsedByEnumerator = true; - return this.pushDownContext.getRequestBuilder(); - } - } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 073a4f1f29f..bf79f803544 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -5,6 +5,8 @@ package org.opensearch.sql.opensearch.storage.scan; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.MISSING_MAX; + import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -50,7 +52,6 @@ import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; -import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -480,9 +481,12 @@ public CalciteLogicalIndexScan pushdownSortExpr(List sortExprDig } RexNode sortExpr = digest.getExpression(); assert sortExpr instanceof RexCall : "sort expression should be RexCall"; - Map directionParams = new LinkedHashMap<>(); - directionParams.put(PlanUtils.NULL_DIRECTION, digest.getNullDirection().name()); - directionParams.put(PlanUtils.DIRECTION, digest.getDirection().name()); + Map missingValueParams = + new LinkedHashMap<>() { + { + put(MISSING_MAX, digest.isMissingMax()); + } + }; // Complex expression - use ScriptQueryExpression to generate script for sort PredicateAnalyzer.ScriptQueryExpression scriptExpr = new PredicateAnalyzer.ScriptQueryExpression( @@ -490,7 +494,7 @@ public CalciteLogicalIndexScan pushdownSortExpr(List sortExprDig rowType, osIndex.getAllFieldTypes(), getCluster(), - directionParams); + missingValueParams); // Determine the correct ScriptSortType based on the expression's return type ScriptSortType sortType = getScriptSortType(sortExpr.getType()); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AbstractAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AbstractAction.java index 65ef6233ffb..9ddba2f970b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AbstractAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AbstractAction.java @@ -14,10 +14,10 @@ public interface AbstractAction { void apply(T target); /** - * Apply the action on the target T and add the operation to the context + * Add the operation to the context * * @param context the context to add the operation to * @param operation the operation to add to the context */ - void transform(PushDownContext context, PushDownOperation operation); + void pushOperation(PushDownContext context, PushDownOperation operation); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java index cd3e84bf7cf..f2362e3c1cc 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java @@ -7,13 +7,9 @@ /** A lambda action to apply on the {@link AggPushDownAction} */ public interface AggregationBuilderAction extends AbstractAction { - /** - * Apply the action on the target {@link AggPushDownAction} and add the operation to the context - * - * @param context the context to add the operation to - * @param operation the operation to add to the context - */ - default void transform(PushDownContext context, PushDownOperation operation) { + default void pushOperation(PushDownContext context, PushDownOperation operation) { + // Apply transformation to aggregation builder in the optimization phase as some transformation + // may cause exception. We need to detect that exception in advance. apply(context.getAggPushDownAction()); context.getOperationsForAgg().add(operation); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/OSRequestBuilderAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/OSRequestBuilderAction.java index bba33883b49..6abe97266f7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/OSRequestBuilderAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/OSRequestBuilderAction.java @@ -9,15 +9,7 @@ /** A lambda action to apply on the {@link OpenSearchRequestBuilder} */ public interface OSRequestBuilderAction extends AbstractAction { - /** - * Apply the action on the target {@link OpenSearchRequestBuilder} and add the operation to the - * context - * - * @param context the context to add the operation to - * @param operation the operation to add to the context - */ - default void transform(PushDownContext context, PushDownOperation operation) { - apply(context.getRequestBuilder()); + default void pushOperation(PushDownContext context, PushDownOperation operation) { context.getOperationsForRequestBuilder().add(operation); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 4a2ade440cf..29700fd6606 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -13,19 +13,22 @@ import lombok.Getter; import org.jetbrains.annotations.NotNull; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; +import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder.PushDownUnSupportedException; import org.opensearch.sql.opensearch.storage.OpenSearchIndex; /** Push down context is used to store all the push down operations that are applied to the query */ @Getter public class PushDownContext extends AbstractCollection { private final OpenSearchIndex osIndex; - private final OpenSearchRequestBuilder requestBuilder; private ArrayDeque operationsForRequestBuilder; private boolean isAggregatePushed = false; private AggPushDownAction aggPushDownAction; private ArrayDeque operationsForAgg; + // Records the start pos of the query, which is updated by new added limit operations. + private int startFrom = 0; + private boolean isLimitPushed = false; private boolean isProjectPushed = false; private boolean isMeasureOrderPushed = false; @@ -36,7 +39,6 @@ public class PushDownContext extends AbstractCollection { public PushDownContext(OpenSearchIndex osIndex) { this.osIndex = osIndex; - this.requestBuilder = osIndex.createRequestBuilder(); } @Override @@ -95,12 +97,19 @@ ArrayDeque getOperationsForAgg() { @Override public boolean add(PushDownOperation operation) { - operation.action().transform(this, operation); + operation.action().pushOperation(this, operation); if (operation.type() == PushDownType.AGGREGATION) { isAggregatePushed = true; this.aggPushDownAction = (AggPushDownAction) operation.action(); } if (operation.type() == PushDownType.LIMIT) { + startFrom += ((LimitDigest) operation.digest()).offset(); + if (startFrom >= osIndex.getMaxResultWindow()) { + throw new PushDownUnSupportedException( + String.format( + "[INTERNAL] Requested offset %d should be less than the max result window %d", + startFrom, osIndex.getMaxResultWindow())); + } isLimitPushed = true; if (isSortPushed || isMeasureOrderPushed || isSortExprPushed) { isTopKPushed = true; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java index 5b7fb1db320..cde43634ae2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/SortExprDigest.java @@ -9,6 +9,8 @@ import lombok.AllArgsConstructor; import lombok.Getter; import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelFieldCollation.Direction; +import org.apache.calcite.rel.RelFieldCollation.NullDirection; import org.apache.calcite.rex.RexNode; import org.apache.commons.lang3.StringUtils; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; @@ -102,4 +104,8 @@ public String toString() { return String.format( "%s %s NULLS_%s", sortTarget, direction.toString(), nullDirection.toString()); } + + public boolean isMissingMax() { + return (direction == Direction.ASCENDING) ^ (nullDirection == NullDirection.FIRST); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java index 1644d9d0509..c1e4f5535bf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/CalciteScriptEngine.java @@ -27,42 +27,36 @@ package org.opensearch.sql.opensearch.storage.script; -import static org.opensearch.sql.data.type.ExprCoreType.BYTE; -import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; -import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; -import static org.opensearch.sql.data.type.ExprCoreType.IP; -import static org.opensearch.sql.data.type.ExprCoreType.SHORT; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.DIGESTS; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.SOURCES; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import java.lang.reflect.Type; import java.time.chrono.ChronoZonedDateTime; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.BiFunction; +import lombok.Getter; import lombok.RequiredArgsConstructor; import org.apache.calcite.DataContext; -import org.apache.calcite.adapter.enumerable.EnumUtils; import org.apache.calcite.adapter.enumerable.PhysType; import org.apache.calcite.adapter.enumerable.RexToLixTranslator; -import org.apache.calcite.adapter.enumerable.RexToLixTranslator.InputGetter; import org.apache.calcite.adapter.java.JavaTypeFactory; import org.apache.calcite.config.CalciteSystemProperty; import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.linq4j.QueryProvider; import org.apache.calcite.linq4j.function.Function1; import org.apache.calcite.linq4j.tree.BlockBuilder; -import org.apache.calcite.linq4j.tree.Expression; import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.linq4j.tree.LabelTarget; -import org.apache.calcite.linq4j.tree.MethodCallExpression; import org.apache.calcite.linq4j.tree.MethodDeclaration; import org.apache.calcite.linq4j.tree.ParameterExpression; -import org.apache.calcite.linq4j.tree.Types; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelRecordType; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexExecutable; import org.apache.calcite.rex.RexNode; @@ -82,12 +76,7 @@ import org.opensearch.script.ScriptEngine; import org.opensearch.script.StringSortScript; import org.opensearch.search.lookup.SourceLookup; -import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; -import org.opensearch.sql.data.model.ExprIpValue; import org.opensearch.sql.data.model.ExprTimestampValue; -import org.opensearch.sql.data.type.ExprCoreType; -import org.opensearch.sql.data.type.ExprType; -import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.storage.script.aggregation.CalciteAggregationScriptFactory; import org.opensearch.sql.opensearch.storage.script.filter.CalciteFilterScriptFactory; import org.opensearch.sql.opensearch.storage.script.sort.CalciteNumberSortScriptFactory; @@ -131,17 +120,19 @@ public String getType() { @Override public T compile( String scriptName, String scriptCode, ScriptContext context, Map options) { - Map objectMap = relJsonSerializer.deserialize(scriptCode); - RexNode rexNode = (RexNode) objectMap.get(RelJsonSerializer.EXPR); - RelDataType rowType = (RelDataType) objectMap.get(RelJsonSerializer.ROW_TYPE); - Map fieldTypes = - (Map) objectMap.get(RelJsonSerializer.FIELD_TYPES); - - JavaTypeFactory typeFactory = OpenSearchTypeFactory.TYPE_FACTORY; - RexToLixTranslator.InputGetter getter = new ScriptInputGetter(typeFactory, rowType, fieldTypes); + RexNode rexNode = relJsonSerializer.deserialize(scriptCode); + + RexToLixTranslator.InputGetter getter = + (blockBuilder, i, type) -> { + throw new UnsupportedScriptException( + "[BUG]There shouldn't be RexInputRef in the RexNode."); + }; String code = CalciteScriptEngine.translate( - relJsonSerializer.getCluster().getRexBuilder(), List.of(rexNode), getter, rowType); + relJsonSerializer.getCluster().getRexBuilder(), + List.of(rexNode), + getter, + new RelRecordType(List.of())); Function1 function = new RexExecutable(code, "generated Rex code").getFunction(); @@ -172,76 +163,26 @@ public UnsupportedScriptException(Throwable cause) { } } - /** - * Implementation of {@link org.apache.calcite.adapter.enumerable.RexToLixTranslator.InputGetter} - * that reads the values of input fields by calling - * {@link org.apache.calcite.DataContext#get}("inputRecord"). - */ - public static class ScriptInputGetter implements InputGetter { - private final RelDataTypeFactory typeFactory; - private final RelDataType rowType; - private final Map fieldTypes; - - public ScriptInputGetter( - RelDataTypeFactory typeFactory, RelDataType rowType, Map fieldTypes) { - this.typeFactory = typeFactory; - this.rowType = rowType; - this.fieldTypes = fieldTypes; - } - - @Override - public org.apache.calcite.linq4j.tree.Expression field( - BlockBuilder list, int index, @Nullable Type storageType) { - String fieldName = rowType.getFieldList().get(index).getName(); - ExprType exprType = fieldTypes.get(fieldName); - String referenceField = OpenSearchTextType.toKeywordSubField(fieldName, exprType); - MethodCallExpression fieldValueExpr = - // Have to invoke `getFromSource` if the field is the text without keyword or struct - (referenceField == null || exprType == ExprCoreType.STRUCT) - ? Expressions.call( - EnumUtils.convert(DataContext.ROOT, ScriptDataContext.class), - Types.lookupMethod(ScriptDataContext.class, "getFromSource", String.class), - Expressions.constant(fieldName)) - : Expressions.call( - DataContext.ROOT, - BuiltInMethod.DATA_CONTEXT_GET.method, - Expressions.constant(referenceField)); - if (storageType == null) { - final RelDataType fieldType = rowType.getFieldList().get(index).getType(); - storageType = ((JavaTypeFactory) typeFactory).getJavaClass(fieldType); - } - return EnumUtils.convert(tryConvertDocValue(fieldValueExpr, exprType), storageType); - } - - /** - * DocValue only support long and double for integer and float, cast to the related type first - */ - private Expression tryConvertDocValue(Expression docValueExpr, ExprType exprType) { - return switch (exprType) { - case INTEGER, SHORT, BYTE -> EnumUtils.convert(docValueExpr, Long.class); - case FLOAT -> EnumUtils.convert(docValueExpr, Double.class); - // IP is scanned in as a string but used as ExprIpValue later. We call the constructor - // beforehand. - case IP -> Expressions.new_( - ExprIpValue.class, EnumUtils.convert(docValueExpr, String.class)); - default -> docValueExpr; - }; - } - } - public static class ScriptDataContext implements DataContext { private final Map> docProvider; private final SourceLookup sourceLookup; - private final Map params; + private final long utcTimestamp; + private final List sources; + private final List digests; + private final Map parameterToIndex; public ScriptDataContext( Map> docProvider, SourceLookup sourceLookup, - Map params) { + Map params, + Map parameterToIndex) { this.docProvider = docProvider; this.sourceLookup = sourceLookup; - this.params = params; + this.utcTimestamp = (long) params.get(Variable.UTC_TIMESTAMP.camelName); + this.sources = ((List) params.get(SOURCES)).stream().map(Source::fromValue).toList(); + this.digests = (List) params.get(DIGESTS); + this.parameterToIndex = parameterToIndex; } @Override @@ -262,9 +203,21 @@ public QueryProvider getQueryProvider() { @Override public Object get(String name) { // UTC_TIMESTAMP is a special variable used for some time related functions. - if (Variable.UTC_TIMESTAMP.camelName.equals(name)) - return params.get(Variable.UTC_TIMESTAMP.camelName); + if (Variable.UTC_TIMESTAMP.camelName.equals(name)) return this.utcTimestamp; + + try { + int index = parameterToIndex.get(name); + return switch (sources.get(index)) { + case DOC_VALUE -> getFromDocValue((String) digests.get(index)); + case SOURCE -> getFromSource((String) digests.get(index)); + case LITERAL -> digests.get(index); + }; + } catch (Exception e) { + throw new IllegalStateException("Failed to get value for parameter " + name); + } + } + public Object getFromDocValue(String name) { ScriptDocValues docValue = this.docProvider.get(name); if (docValue == null || docValue.isEmpty()) { return null; // No way to differentiate null and missing from doc value @@ -285,6 +238,35 @@ public Object getFromSource(String name) { } } + @Getter + public enum Source { + DOC_VALUE(0), + SOURCE(1), + LITERAL(2); + + private final int value; + + Source(int value) { + this.value = value; + } + + private static final Map VALUE_TO_SOURCE = new HashMap<>(); + + static { + for (Source source : Source.values()) { + VALUE_TO_SOURCE.put(source.value, source); + } + } + + public static Source fromValue(int value) { + Source source = VALUE_TO_SOURCE.get(value); + if (source == null) { + throw new IllegalArgumentException("No Source with value: " + value); + } + return source; + } + } + /** * This function is copied from Calcite RexExecutorImpl It's used to compile RexNode expression to * java code string. diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScript.java index 8809513a2ad..83ec6093718 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScript.java @@ -37,21 +37,26 @@ class CalciteAggregationScript extends AggregationScript { private final RelDataType type; + private final Map parametersToIndex; + public CalciteAggregationScript( Function1 function, RelDataType type, SearchLookup lookup, LeafReaderContext context, - Map params) { + Map params, + Map parametersToIndex) { super(params, lookup, context); this.calciteScript = new CalciteScript(function, params); this.sourceLookup = lookup.getLeafSearchLookup(context).source(); this.type = type; + this.parametersToIndex = parametersToIndex; } @Override public Object execute() { - Object value = calciteScript.execute(this.getDoc(), this.sourceLookup)[0]; + Object value = + calciteScript.execute(this.getDoc(), this.sourceLookup, this.parametersToIndex)[0]; ExprType exprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(type); // See logic in {@link ExpressionAggregationScript::execute} return switch ((ExprCoreType) exprType) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScriptLeafFactory.java index 8555b0a82a7..b0b0914e072 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScriptLeafFactory.java @@ -5,7 +5,12 @@ package org.opensearch.sql.opensearch.storage.script.aggregation; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.SOURCES; + +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import org.apache.calcite.DataContext; import org.apache.calcite.linq4j.function.Function1; import org.apache.calcite.rel.type.RelDataType; @@ -25,6 +30,12 @@ class CalciteAggregationScriptLeafFactory implements AggregationScript.LeafFacto /** Document lookup that returns doc values. */ private final SearchLookup lookup; + /** + * Stores the parameter names to the actual indices in SOURCES. Generate it in advance in factory + * to save the process for each document* + */ + private final Map parametersToIndex; + public CalciteAggregationScriptLeafFactory( Function1 function, RelDataType type, @@ -34,11 +45,15 @@ public CalciteAggregationScriptLeafFactory( this.type = type; this.params = params; this.lookup = lookup; + this.parametersToIndex = + IntStream.range(0, ((List) params.get(SOURCES)).size()) + .boxed() + .collect(Collectors.toMap(i -> "?" + i, i -> i)); } @Override public AggregationScript newInstance(LeafReaderContext ctx) { - return new CalciteAggregationScript(function, type, lookup, ctx, params); + return new CalciteAggregationScript(function, type, lookup, ctx, params, parametersToIndex); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/CalciteScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/CalciteScript.java index 4568fddc8ed..ae79f18fa45 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/CalciteScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/CalciteScript.java @@ -40,9 +40,14 @@ public CalciteScript(Function1 function, Map> docProvider, SourceLookup sourceLookup) { + public Object[] execute( + Map> docProvider, + SourceLookup sourceLookup, + Map parametersToIndex) { return AccessController.doPrivileged( (PrivilegedAction) - () -> function.apply(new ScriptDataContext(docProvider, sourceLookup, params))); + () -> + function.apply( + new ScriptDataContext(docProvider, sourceLookup, params, parametersToIndex))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScript.java index 90f5af66481..3d8f82eb638 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScript.java @@ -27,20 +27,25 @@ class CalciteFilterScript extends FilterScript { private final SourceLookup sourceLookup; + private final Map parametersToIndex; + public CalciteFilterScript( Function1 function, SearchLookup lookup, LeafReaderContext context, - Map params) { + Map params, + Map parametersToIndex) { super(params, lookup, context); this.calciteScript = new CalciteScript(function, params); // TODO: we'd better get source from the leafLookup of super once it's available this.sourceLookup = lookup.getLeafSearchLookup(context).source(); + this.parametersToIndex = parametersToIndex; } @Override public boolean execute() { - Object result = calciteScript.execute(this.getDoc(), this.sourceLookup)[0]; + Object result = + calciteScript.execute(this.getDoc(), this.sourceLookup, this.parametersToIndex)[0]; // The result should be type of BOOLEAN_NULLABLE. Treat it as false if null return result != null && (boolean) result; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java index 93f538a50b6..0158e593902 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/CalciteFilterScriptLeafFactory.java @@ -5,7 +5,12 @@ package org.opensearch.sql.opensearch.storage.script.filter; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.SOURCES; + +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import org.apache.calcite.DataContext; import org.apache.calcite.linq4j.function.Function1; import org.apache.lucene.index.LeafReaderContext; @@ -23,15 +28,25 @@ class CalciteFilterScriptLeafFactory implements FilterScript.LeafFactory { /** Document lookup that returns doc values. */ private final SearchLookup lookup; + /** + * Stores the parameter names to the actual indices in SOURCES. Generate it in advance in factory + * to save the process for each document* + */ + private final Map parametersToIndex; + public CalciteFilterScriptLeafFactory( Function1 function, Map params, SearchLookup lookup) { this.function = function; this.params = params; this.lookup = lookup; + this.parametersToIndex = + IntStream.range(0, ((List) params.get(SOURCES)).size()) + .boxed() + .collect(Collectors.toMap(i -> "?" + i, i -> i)); } @Override public FilterScript newInstance(LeafReaderContext ctx) { - return new CalciteFilterScript(function, lookup, ctx, params); + return new CalciteFilterScript(function, lookup, ctx, params, parametersToIndex); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java index f368ac184eb..0697d3f25a2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScript.java @@ -5,17 +5,16 @@ package org.opensearch.sql.opensearch.storage.script.sort; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.MISSING_MAX; + import java.util.Map; import lombok.EqualsAndHashCode; import org.apache.calcite.DataContext; import org.apache.calcite.linq4j.function.Function1; -import org.apache.calcite.rel.RelFieldCollation.Direction; -import org.apache.calcite.rel.RelFieldCollation.NullDirection; import org.apache.lucene.index.LeafReaderContext; import org.opensearch.script.NumberSortScript; import org.opensearch.search.lookup.SearchLookup; import org.opensearch.search.lookup.SourceLookup; -import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.script.core.CalciteScript; /** Calcite number sort script. */ @@ -26,38 +25,32 @@ public class CalciteNumberSortScript extends NumberSortScript { private final CalciteScript calciteScript; private final SourceLookup sourceLookup; - private final Direction direction; - private final NullDirection nullDirection; + private final boolean missingMax; + private final Map parametersToIndex; public CalciteNumberSortScript( Function1 function, SearchLookup lookup, LeafReaderContext context, - Map params) { + Map params, + Map parametersToIndex) { super(params, lookup, context); this.calciteScript = new CalciteScript(function, params); // TODO: we'd better get source from the leafLookup of super once it's available this.sourceLookup = lookup.getLeafSearchLookup(context).source(); - this.direction = - params.containsKey(PlanUtils.DIRECTION) - ? Direction.valueOf((String) params.get(PlanUtils.DIRECTION)) - : Direction.ASCENDING; - this.nullDirection = - params.containsKey(PlanUtils.NULL_DIRECTION) - ? NullDirection.valueOf((String) params.get(PlanUtils.NULL_DIRECTION)) - : NullDirection.FIRST; + this.parametersToIndex = parametersToIndex; + this.missingMax = (Boolean) params.getOrDefault(MISSING_MAX, false); } @Override public double execute() { - Object value = calciteScript.execute(this.getDoc(), this.sourceLookup)[0]; + Object value = + calciteScript.execute(this.getDoc(), this.sourceLookup, this.parametersToIndex)[0]; // There is a limitation here when the Double value is exactly theoretical min/max value. // It can't distinguish the ordering between null and exact Double.NEGATIVE_INFINITY or // Double.NaN. if (value == null) { - boolean isAscending = direction == Direction.ASCENDING; - boolean isNullFirst = nullDirection == NullDirection.FIRST; - return isAscending == isNullFirst ? Double.NEGATIVE_INFINITY : Double.NaN; + return this.missingMax ? Double.NaN : Double.NEGATIVE_INFINITY; } return ((Number) value).doubleValue(); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java index 703a8946e43..f3c94162cdc 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteNumberSortScriptLeafFactory.java @@ -5,8 +5,13 @@ package org.opensearch.sql.opensearch.storage.script.sort; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.SOURCES; + import java.io.IOException; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import lombok.EqualsAndHashCode; import org.apache.calcite.DataContext; import org.apache.calcite.linq4j.function.Function1; @@ -26,16 +31,26 @@ class CalciteNumberSortScriptLeafFactory implements NumberSortScript.LeafFactory /** Document lookup that returns doc values. */ private final SearchLookup lookup; + /** + * Stores the parameter names to the actual indices in SOURCES. Generate it in advance in factory + * to save the process for each document* + */ + private final Map parametersToIndex; + public CalciteNumberSortScriptLeafFactory( Function1 function, Map params, SearchLookup lookup) { this.function = function; this.params = params; this.lookup = lookup; + this.parametersToIndex = + IntStream.range(0, ((List) params.get(SOURCES)).size()) + .boxed() + .collect(Collectors.toMap(i -> "?" + i, i -> i)); } @Override public NumberSortScript newInstance(LeafReaderContext context) throws IOException { - return new CalciteNumberSortScript(function, lookup, context, params); + return new CalciteNumberSortScript(function, lookup, context, params, parametersToIndex); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java index 8e73cc0da97..085c40de28c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScript.java @@ -5,17 +5,16 @@ package org.opensearch.sql.opensearch.storage.script.sort; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.MISSING_MAX; + import java.util.Map; import lombok.EqualsAndHashCode; import org.apache.calcite.DataContext; import org.apache.calcite.linq4j.function.Function1; -import org.apache.calcite.rel.RelFieldCollation.Direction; -import org.apache.calcite.rel.RelFieldCollation.NullDirection; import org.apache.lucene.index.LeafReaderContext; import org.opensearch.script.StringSortScript; import org.opensearch.search.lookup.SearchLookup; import org.opensearch.search.lookup.SourceLookup; -import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.script.core.CalciteScript; /** Calcite string sort script. */ @@ -26,8 +25,10 @@ public class CalciteStringSortScript extends StringSortScript { private final CalciteScript calciteScript; private final SourceLookup sourceLookup; - private final Direction direction; - private final NullDirection nullDirection; + + private final Map parametersToIndex; + + private final boolean missingMax; private static final String MAX_SENTINEL = "\uFFFF\uFFFF_NULL_PLACEHOLDER_"; private static final String MIN_SENTINEL = "\u0000\u0000_NULL_PLACEHOLDER_"; @@ -36,30 +37,23 @@ public CalciteStringSortScript( Function1 function, SearchLookup lookup, LeafReaderContext context, - Map params) { + Map params, + Map parametersToIndex) { super(params, lookup, context); this.calciteScript = new CalciteScript(function, params); // TODO: we'd better get source from the leafLookup of super once it's available this.sourceLookup = lookup.getLeafSearchLookup(context).source(); - this.direction = - params.containsKey(PlanUtils.DIRECTION) - ? Direction.valueOf((String) params.get(PlanUtils.DIRECTION)) - : Direction.ASCENDING; - this.nullDirection = - params.containsKey(PlanUtils.NULL_DIRECTION) - ? NullDirection.valueOf((String) params.get(PlanUtils.NULL_DIRECTION)) - : NullDirection.FIRST; + this.parametersToIndex = parametersToIndex; + this.missingMax = (Boolean) params.getOrDefault(MISSING_MAX, false); } @Override public String execute() { - Object value = calciteScript.execute(this.getDoc(), this.sourceLookup)[0]; + Object value = calciteScript.execute(this.getDoc(), this.sourceLookup, parametersToIndex)[0]; // There is a limitation here when the String value is larger or smaller than sentinel values. // It can't guarantee the lexigraphic ordering between null and special strings. if (value == null) { - boolean isAscending = direction == Direction.ASCENDING; - boolean isNullFirst = nullDirection == NullDirection.FIRST; - return isAscending == isNullFirst ? MIN_SENTINEL : MAX_SENTINEL; + return this.missingMax ? MAX_SENTINEL : MIN_SENTINEL; } return value.toString(); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java index a95ee30d59d..36a88fa6b0c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/CalciteStringSortScriptLeafFactory.java @@ -5,8 +5,13 @@ package org.opensearch.sql.opensearch.storage.script.sort; +import static org.opensearch.sql.opensearch.storage.serde.ScriptParameterHelper.SOURCES; + import java.io.IOException; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import lombok.EqualsAndHashCode; import org.apache.calcite.DataContext; import org.apache.calcite.linq4j.function.Function1; @@ -27,15 +32,25 @@ public class CalciteStringSortScriptLeafFactory implements StringSortScript.Leaf /** Search lookup. */ private final SearchLookup lookup; + /** + * Stores the parameter names to the actual indices in SOURCES. Generate it in advance in factory + * to save the process for each document* + */ + private final Map parametersToIndex; + public CalciteStringSortScriptLeafFactory( Function1 function, Map params, SearchLookup lookup) { this.function = function; this.params = params; this.lookup = lookup; + this.parametersToIndex = + IntStream.range(0, ((List) params.get(SOURCES)).size()) + .boxed() + .collect(Collectors.toMap(i -> "?" + i, i -> i)); } @Override public StringSortScript newInstance(LeafReaderContext context) throws IOException { - return new CalciteStringSortScript(function, lookup, context, params); + return new CalciteStringSortScript(function, lookup, context, params, parametersToIndex); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java index c7451adbf8b..d77dee3e297 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java @@ -79,6 +79,7 @@ import org.checkerframework.checker.nullness.qual.PolyNull; import org.opensearch.sql.calcite.type.AbstractExprRelDataType; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; /** * An extension to {@link RelJson} to allow serialization & deserialization of UDTs @@ -364,7 +365,11 @@ public RelDataType toType(RelDataTypeFactory typeFactory, Object o) { // Reconstruct UDT from its udt tag Object udtName = ((Map) o).get("udt"); OpenSearchTypeFactory.ExprUDT udt = OpenSearchTypeFactory.ExprUDT.valueOf((String) udtName); - return ((OpenSearchTypeFactory) typeFactory).createUDT(udt); + // View IP as string to avoid using a value of customized java type in the script. + if (udt == ExprUDT.EXPR_IP) return super.toType(typeFactory, o); + RelDataType type = ((OpenSearchTypeFactory) typeFactory).createUDT(udt); + boolean nullable = (Boolean) ((Map) o).get("nullable"); + return typeFactory.createTypeWithNullability(type, nullable); } return super.toType(typeFactory, o); } @@ -802,4 +807,10 @@ public boolean getBoolean(String tag, boolean default_) { throw new UnsupportedOperationException(); } } + + static RexNode translateInput( + RelJson relJson, int input, Map map, RelInput relInput) { + throw new UnsupportedOperationException( + "There shouldn't be any RexInputRef in the serialized RexNode."); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/OpenSearchRelInputTranslator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/OpenSearchRelInputTranslator.java index b5f401bb64a..da69c2ad518 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/OpenSearchRelInputTranslator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/OpenSearchRelInputTranslator.java @@ -36,7 +36,7 @@ public RexNode translateInput( if (input < rowType.getFieldCount()) { final RelDataTypeField field = rowType.getFieldList().get(input); - return rexBuilder.makeInputRef(field.getType(), input); + return rexBuilder.makeLocalRef(field.getType(), input); } throw new RuntimeException("input field " + input + " is out of range"); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java index fb751c8a72c..604012c4870 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializer.java @@ -14,13 +14,11 @@ import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.Base64; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import lombok.Getter; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.externalize.RelJson; -import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlOperatorTable; import org.apache.calcite.sql.fun.SqlLibrary; @@ -28,12 +26,9 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.util.SqlOperatorTables; import org.apache.calcite.util.JsonBuilder; -import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.calcite.CalcitePlanContext; -import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.PPLBuiltinOperators; -import org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine; -import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; +import org.opensearch.sql.opensearch.executor.OpenSearchExecutionEngine.OperatorTable; /** * A serializer that (de-)serializes Calcite RexNode, RelDataType and OpenSearch field mapping. @@ -47,10 +42,6 @@ public class RelJsonSerializer { private final RelOptCluster cluster; - - public static final String EXPR = "expr"; - public static final String FIELD_TYPES = "fieldTypes"; - public static final String ROW_TYPE = "rowType"; private static final ObjectMapper mapper = new ObjectMapper(); private static final TypeReference> TYPE_REF = new TypeReference<>() {}; @@ -72,7 +63,7 @@ private static SqlOperatorTable getPplSqlOperatorTable() { SqlOperatorTables.chain( PPLBuiltinOperators.instance(), SqlStdOperatorTable.instance(), - OpenSearchExecutionEngine.OperatorTable.instance(), + OperatorTable.instance(), // Add a list of necessary SqlLibrary if needed SqlLibraryOperatorTableFactory.INSTANCE.getOperatorTable( SqlLibrary.MYSQL, @@ -89,45 +80,31 @@ private static SqlOperatorTable getPplSqlOperatorTable() { * Serializes Calcite expressions and field types into a map object string. * *

This method: - *

  • Convert RexNode and RelDataType objects to JSON strings. - *
  • Combines these JSON strings with OpenSearch field mappings into a map + *
  • Standardize the original RexNode + *
  • Convert RexNode objects to JSON strings. *
  • Encodes the resulting map into a final object string * * @param rexNode pushed down RexNode - * @param rowType row type of RexNode input - * @param fieldTypes input field and ExprType mapping - * @return serialized string of map structure for inputs + * @return serialized string of RexNode expression. */ - public String serialize(RexNode rexNode, RelDataType rowType, Map fieldTypes) { - // Extract necessary fields and remap expression input indices for original RexNode - Pair remappedRexInfo = - OpenSearchRelOptUtil.getRemappedRexAndType(rexNode, rowType); - Map filteredFieldTypes = new HashMap<>(); - for (String fieldName : remappedRexInfo.getValue().getFieldNames()) { - filteredFieldTypes.put(fieldName, fieldTypes.get(fieldName)); - } + public String serialize(RexNode rexNode, ScriptParameterHelper parameterHelper) { + RexNode standardizedRexExpr = + RexStandardizer.standardizeRexNodeExpression(rexNode, parameterHelper); try { // Serialize RexNode and RelDataType by JSON JsonBuilder jsonBuilder = new JsonBuilder(); RelJson relJson = ExtendedRelJson.create(jsonBuilder); - String rexNodeJson = jsonBuilder.toJsonString(relJson.toJson(remappedRexInfo.getKey())); - Object rowTypeJsonObj = relJson.toJson(remappedRexInfo.getValue()); - String rowTypeJson = jsonBuilder.toJsonString(rowTypeJsonObj); - // Construct envelope of serializable objects - Map envelope = - Map.of(EXPR, rexNodeJson, FIELD_TYPES, filteredFieldTypes, ROW_TYPE, rowTypeJson); + String rexNodeJson = jsonBuilder.toJsonString(relJson.toJson(standardizedRexExpr)); + if (CalcitePlanContext.skipEncoding.get()) return rexNodeJson; // Write bytes of all serializable contents ByteArrayOutputStream output = new ByteArrayOutputStream(); ObjectOutputStream objectOutput = new ObjectOutputStream(output); - objectOutput.writeObject(envelope); + objectOutput.writeObject(rexNodeJson); objectOutput.flush(); - return CalcitePlanContext.skipEncoding.get() - ? rexNodeJson - : Base64.getEncoder().encodeToString(output.toByteArray()); + return Base64.getEncoder().encodeToString(output.toByteArray()); } catch (Exception e) { - throw new IllegalStateException( - "Failed to serialize RexNode: " + remappedRexInfo.getKey(), e); + throw new IllegalStateException("Failed to serialize RexNode: " + standardizedRexExpr, e); } } @@ -138,34 +115,23 @@ public String serialize(RexNode rexNode, RelDataType rowType, Map deserialize(String struct) { - Map objectMap = null; + public RexNode deserialize(String struct) { + String exprStr = null; try { - // Recover Map object from bytes ByteArrayInputStream input = new ByteArrayInputStream(Base64.getDecoder().decode(struct)); ObjectInputStream objectInput = new ObjectInputStream(input); - objectMap = (Map) objectInput.readObject(); + exprStr = (String) objectInput.readObject(); - // PPL Expr types are all serializable - Map fieldTypes = (Map) objectMap.get(FIELD_TYPES); // Deserialize RelDataType and RexNode by JSON RelJson relJson = ExtendedRelJson.create((JsonBuilder) null); - Map rowTypeMap = mapper.readValue((String) objectMap.get(ROW_TYPE), TYPE_REF); - RelDataType rowType = relJson.toType(cluster.getTypeFactory(), rowTypeMap); - OpenSearchRelInputTranslator inputTranslator = new OpenSearchRelInputTranslator(rowType); relJson = - relJson.withInputTranslator(inputTranslator).withOperatorTable(getPplSqlOperatorTable()); - Map exprMap = mapper.readValue((String) objectMap.get(EXPR), TYPE_REF); - RexNode rexNode = relJson.toRex(cluster, exprMap); - - return Map.of(EXPR, rexNode, FIELD_TYPES, fieldTypes, ROW_TYPE, rowType); + relJson + .withInputTranslator(ExtendedRelJson::translateInput) + .withOperatorTable(getPplSqlOperatorTable()); + Map exprMap = mapper.readValue(exprStr, TYPE_REF); + return relJson.toRex(cluster, exprMap); } catch (Exception e) { - if (objectMap == null) { - throw new IllegalStateException( - "Failed to deserialize RexNode due to object map is null", e); - } - throw new IllegalStateException( - "Failed to deserialize RexNode and its required structure: " + objectMap.get(EXPR), e); + throw new IllegalStateException("Failed to deserialize RexNode " + exprStr, e); } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java new file mode 100644 index 00000000000..e015e5cddcd --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java @@ -0,0 +1,194 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.serde; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.ConstantExpression; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBiVisitorImpl; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexDynamicParam; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLambda; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexLocalRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexNodeAndFieldIndex; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexPatternFieldRef; +import org.apache.calcite.rex.RexRangeRef; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.rex.RexTableInputRef; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; +import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.Source; + +/** + * This help standardizes the RexNode expression, the process including: 1. Replace RexInputRef with + * RexDynamicParam 2. Replace RexLiteral with RexDynamicParam TODO: 3. Replace RexCall with + * equivalent functions 4. Replace RelDataType with a wider type 5. Do constant folding + */ +public class RexStandardizer extends RexBiVisitorImpl { + private static final RexStandardizer standardizer = new RexStandardizer(true); + + protected RexStandardizer(boolean deep) { + super(deep); + } + + @Override + public RexNode visitCall(final RexCall call, ScriptParameterHelper helper) { + boolean[] update = {false}; + List clonedOperands = visitList(call.operands, helper, update); + if (update[0]) { + // REVIEW jvs 8-Mar-2005: This doesn't take into account + // the fact that a rewrite may have changed the result type. + // To do that, we would need to take a RexBuilder and + // watch out for special operators like CAST and NEW where + // the type is embedded in the original call. + return call.clone(call.getType(), clonedOperands); + } else { + return call; + } + } + + @Override + public RexNode visitInputRef(RexInputRef inputRef, ScriptParameterHelper helper) { + int index = inputRef.getIndex(); + RelDataTypeField field = helper.inputFieldList.get(index); + ExprType exprType = helper.fieldTypes.get(field.getName()); + String docFieldName = + exprType == ExprCoreType.STRUCT || exprType == ExprCoreType.ARRAY + ? null + : OpenSearchTextType.toKeywordSubField(field.getName(), exprType); + int newIndex = helper.sources.size(); + if (docFieldName != null) { + helper.digests.add(docFieldName); + helper.sources.add(Source.DOC_VALUE.getValue()); + } else { + helper.digests.add(field.getName()); + helper.sources.add(Source.SOURCE.getValue()); + } + return new RexDynamicParam(field.getType(), newIndex); + } + + @Override + public RexNode visitLiteral(RexLiteral literal, ScriptParameterHelper helper) { + /* + * 1. Skip replacing SARG/DECIMAL as it is not supported to translate literal; + * 2. Skip replacing SYMBOL/NULL as it may affect codegen, shouldn't be parameter; + * 3. Skip INTERVAL_TYPES as it has bug, introduced by calcite-1.41.1, TODO: remove this when fixed; + */ + if (literal.getTypeName() == SqlTypeName.SARG + || literal.getType().getSqlTypeName() == SqlTypeName.DECIMAL + || literal.getTypeName() == SqlTypeName.SYMBOL + || literal.getTypeName() == SqlTypeName.NULL + || SqlTypeName.INTERVAL_TYPES.contains(literal.getTypeName())) { + return literal; + } + + Object literalValue = translateLiteral(literal); + if (literalValue == null) return literal; + int newIndex = helper.sources.size(); + helper.sources.add(Source.LITERAL.getValue()); + helper.digests.add(literalValue); + return new RexDynamicParam(literal.getType(), newIndex); + } + + /** Override all below method to avoid returning null. */ + @Override + public RexNode visitLocalRef(RexLocalRef localRef, ScriptParameterHelper arg) { + return localRef; + } + + @Override + public RexNode visitOver(RexOver over, ScriptParameterHelper arg) { + return over; + } + + @Override + public RexNode visitCorrelVariable(RexCorrelVariable correlVariable, ScriptParameterHelper arg) { + return correlVariable; + } + + @Override + public RexNode visitDynamicParam(RexDynamicParam dynamicParam, ScriptParameterHelper arg) { + return dynamicParam; + } + + @Override + public RexNode visitRangeRef(RexRangeRef rangeRef, ScriptParameterHelper arg) { + return rangeRef; + } + + @Override + public RexNode visitFieldAccess(RexFieldAccess fieldAccess, ScriptParameterHelper arg) { + return fieldAccess; + } + + @Override + public RexNode visitSubQuery(RexSubQuery subQuery, ScriptParameterHelper arg) { + return subQuery; + } + + @Override + public RexNode visitTableInputRef(RexTableInputRef ref, ScriptParameterHelper arg) { + return ref; + } + + @Override + public RexNode visitPatternFieldRef(RexPatternFieldRef fieldRef, ScriptParameterHelper arg) { + return fieldRef; + } + + @Override + public RexNode visitLambda(RexLambda lambda, ScriptParameterHelper arg) { + return lambda; + } + + @Override + public RexNode visitNodeAndFieldIndex( + RexNodeAndFieldIndex nodeAndFieldIndex, ScriptParameterHelper arg) { + return nodeAndFieldIndex; + } + + protected List visitList( + List exprs, ScriptParameterHelper helper, boolean[] update) { + ImmutableList.Builder clonedOperands = ImmutableList.builder(); + for (RexNode operand : exprs) { + RexNode clonedOperand = operand.accept(this, helper); + if ((clonedOperand != operand) && (update != null)) { + update[0] = true; + } + clonedOperands.add(clonedOperand); + } + return clonedOperands.build(); + } + + private static Object translateLiteral(RexLiteral literal) { + org.apache.calcite.linq4j.tree.Expression expression = + RexToLixTranslator.translateLiteral( + literal, + literal.getType(), + OpenSearchTypeFactory.TYPE_FACTORY, + org.apache.calcite.adapter.enumerable.RexImpTable.NullAs.NOT_POSSIBLE); + if (expression instanceof ConstantExpression constantExpression) { + return constantExpression.value; + } + return null; + } + + public static RexNode standardizeRexNodeExpression( + RexNode rexNode, ScriptParameterHelper helper) { + return rexNode.accept(standardizer, helper); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ScriptParameterHelper.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ScriptParameterHelper.java new file mode 100644 index 00000000000..c30ba1c245d --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ScriptParameterHelper.java @@ -0,0 +1,86 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.serde; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import lombok.Getter; +import org.apache.calcite.DataContext.Variable; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.runtime.Hook; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.UnsupportedScriptException; + +@Getter +public class ScriptParameterHelper { + public static final String MISSING_MAX = "MISSING_MAX"; + + public static final String SOURCES = "SOURCES"; + public static final String DIGESTS = "DIGESTS"; + + /** Existing params derived from push down action, e.g., SORT_EXPR push down */ + Map existingParams; + + List inputFieldList; + Map fieldTypes; + + /** + * Records the source of each parameter, it decides which kind of source to retrieve value. + * + *

    0 stands for DOC_VALUE + * + *

    1 stand for SOURCE + * + *

    2 stands for LITERAL + */ + List sources; + + /** + * Records the digest of each parameter, which is used as the key to retrieve the value from the + * corresponding sources. It will be + * + *

    - field name for `DOC_VALUE` and `SOURCE`, + * + *

    - literal value itself for `LITERAL` + */ + List digests; + + public ScriptParameterHelper( + List inputFieldList, Map fieldTypes) { + this(inputFieldList, fieldTypes, Collections.emptyMap()); + } + + public ScriptParameterHelper( + List inputFieldList, + Map fieldTypes, + Map params) { + this.existingParams = params; + this.inputFieldList = inputFieldList; + this.fieldTypes = fieldTypes; + this.sources = new ArrayList<>(); + this.digests = new ArrayList<>(); + } + + public Map getParameters() { + // The timestamp when the query is executed, it's used by some time-related functions. + long currentTime = Hook.CURRENT_TIME.get(-1L); + if (currentTime < 0) { + throw new UnsupportedScriptException( + "ScriptQueryExpression requires a valid current time from hook, but it is not set"); + } + return new LinkedHashMap<>() { // Use LinkedHashMap to make the plan stable + { + putAll(existingParams); + put(Variable.UTC_TIMESTAMP.camelName, currentTime); + put(SOURCES, sources); + put(DIGESTS, digests); + } + }; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java index 90738a267ff..83e8b8b74b1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java @@ -27,49 +27,16 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; -import org.apache.calcite.util.mapping.Mapping; -import org.apache.calcite.util.mapping.Mappings; import org.apache.commons.lang3.tuple.Pair; -import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; import org.opensearch.sql.opensearch.storage.scan.context.SortExprDigest; @UtilityClass public class OpenSearchRelOptUtil { - private static final RemapIndexBiVisitor remapIndexBiVisitor = new RemapIndexBiVisitor(true); - - /** - * For pushed down RexNode, the input schema doesn't need to be the same with scan output schema - * because the input values are read from ScriptDocValues or source by field name. It doesn't - * matter what the actual index is. Current serialization will serialize map of field name and - * field ExprType, which could be a long serialized string. Use this method to narrow down input - * rowType and rewrite RexNode's input references. After that, we can leverage the fewer columns - * in the rowType to serialize least required field types. - * - * @param rexNode original RexNode to be pushed down - * @param inputRowType original input rowType of RexNode - * @return rewritten pair of RexNode and RelDataType - */ - public static Pair getRemappedRexAndType( - final RexNode rexNode, final RelDataType inputRowType) { - final BitSet seenOldIndex = new BitSet(); - final List newMappings = new ArrayList<>(); - rexNode.accept(remapIndexBiVisitor, Pair.of(seenOldIndex, newMappings)); - final List inputFieldList = inputRowType.getFieldList(); - final RelDataTypeFactory.Builder builder = OpenSearchTypeFactory.TYPE_FACTORY.builder(); - for (Integer oldIdx : newMappings) { - builder.add(inputFieldList.get(oldIdx)); - } - final Mapping mapping = Mappings.target(newMappings, inputRowType.getFieldCount()); - final RexNode newMappedRex = RexUtil.apply(mapping, rexNode); - return Pair.of(newMappedRex, builder.build()); - } - /** * Given an input Calcite RexNode, find the single input field with equivalent collation * information. The function returns the pair of input field index and a flag to indicate whether diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index f07ec4139dc..ec0e5d919b3 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -488,10 +488,10 @@ void analyze_aggCall_complexScriptFilter() throws ExpressionNotAnalyzableExcepti "filter_complex_count")) .expectDslTemplate( "[{\"filter_bool_count\":{\"filter\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"*\\\"}\"," - + "\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":0}},\"boost\":1.0}}," + + "\"lang\":\"opensearch_compounded_script\",\"params\":{*}},\"boost\":1.0}}," + "\"aggregations\":{\"filter_bool_count\":{\"value_count\":{\"field\":\"_index\"}}}}}," + " {\"filter_complex_count\":{\"filter\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"*\\\"}\"," - + "\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":0}},\"boost\":1.0}}," + + "\"lang\":\"opensearch_compounded_script\",\"params\":{*}},\"boost\":1.0}}," + "\"aggregations\":{\"filter_complex_count\":{\"value_count\":{\"field\":\"_index\"}}}}}]") .expectResponseParser( new MetricParserHelper( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJsonTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJsonTest.java index e4703db86c1..024b2068200 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJsonTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJsonTest.java @@ -302,13 +302,13 @@ void testDeserializeMapTypes() { "nullable", false, "key", - Map.of("udt", "EXPR_IP", "type", "VARCHAR", "nullable", false, "precision", -1), + Map.of("udt", "EXPR_IP", "type", "OTHER", "nullable", false, "precision", -1), "value", Map.of("udt", "EXPR_TIMESTAMP", "type", "VARCHAR", "nullable", false, "precision", -1)); RelDataType expectedComplexMap = typeFactory.createMapType( - typeFactory.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_IP), + typeFactory.createSqlType(SqlTypeName.OTHER, false), typeFactory.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_TIMESTAMP), false); @@ -316,8 +316,7 @@ void testDeserializeMapTypes() { assertEquals(expectedComplexMap, deserializedType); assertEquals( - typeFactory.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_IP), - deserializedType.getKeyType()); + typeFactory.createSqlType(SqlTypeName.OTHER, false), deserializedType.getKeyType()); assertEquals( typeFactory.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_TIMESTAMP), deserializedType.getValueType()); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializerTest.java index 1ba4d94c614..116ba5027fb 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/serde/RelJsonSerializerTest.java @@ -7,7 +7,9 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; +import java.util.List; import java.util.Map; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.volcano.VolcanoPlanner; @@ -28,12 +30,13 @@ import org.opensearch.sql.expression.function.PPLFuncImpTable; import org.opensearch.sql.opensearch.data.type.OpenSearchBinaryType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) public class RelJsonSerializerTest { - private final RexBuilder rexBuilder = new RexBuilder(OpenSearchTypeFactory.TYPE_FACTORY); + private final RexBuilder rexBuilder = new RexBuilder(TYPE_FACTORY); private final RelOptCluster cluster = RelOptCluster.create(new VolcanoPlanner(), rexBuilder); private final RelJsonSerializer serializer = new RelJsonSerializer(cluster); private final RelDataType rowType = @@ -52,13 +55,20 @@ void testSerializeAndDeserialize() { rexBuilder, BuiltinFunctionName.UPPER, rexBuilder.makeInputRef(rowType.getFieldList().get(0).getType(), 0)); + RexNode expectedNode = + PPLFuncImpTable.INSTANCE.resolve( + rexBuilder, + BuiltinFunctionName.UPPER, + rexBuilder.makeDynamicParam(rowType.getFieldList().get(0).getType(), 0)); - String code = serializer.serialize(rexUpper, rowType, fieldTypes); - Map objects = serializer.deserialize(code); + final ScriptParameterHelper helper = + new ScriptParameterHelper(rowType.getFieldList(), fieldTypes); + String code = serializer.serialize(rexUpper, helper); + RexNode rexNode = serializer.deserialize(code); - assertEquals(rexUpper, objects.get(RelJsonSerializer.EXPR)); - assertEquals(rowType, objects.get(RelJsonSerializer.ROW_TYPE)); - assertEquals(fieldTypes, objects.get(RelJsonSerializer.FIELD_TYPES)); + assertEquals(expectedNode, rexNode); + assertEquals(List.of(0), helper.sources); + assertEquals(List.of("Referer"), helper.digests); } @Test @@ -72,10 +82,7 @@ void testSerializeAndDeserializeUDT() { .add("time", UserDefinedFunctionUtils.NULLABLE_TIME_UDT) .add("timestamp", UserDefinedFunctionUtils.NULLABLE_TIMESTAMP_UDT) .add("ip", UserDefinedFunctionUtils.NULLABLE_IP_UDT) - .add( - "binary", - OpenSearchTypeFactory.TYPE_FACTORY.createUDT( - OpenSearchTypeFactory.ExprUDT.EXPR_BINARY)) + .add("binary", TYPE_FACTORY.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_BINARY)) .build(); Map fieldTypesWithUDT = Map.ofEntries( @@ -93,19 +100,32 @@ void testSerializeAndDeserializeUDT() { rexBuilder.makeInputRef(rowTypeWithUDT.getFieldList().get(2).getType(), 2), rexBuilder.makeInputRef(rowTypeWithUDT.getFieldList().get(3).getType(), 3), rexBuilder.makeInputRef(rowTypeWithUDT.getFieldList().get(4).getType(), 4)); - String serialized = serializer.serialize(rexNode, rowTypeWithUDT, fieldTypesWithUDT); - Map objects = serializer.deserialize(serialized); - assertEquals(rexNode, objects.get(RelJsonSerializer.EXPR)); - assertEquals(rowTypeWithUDT.toString(), objects.get(RelJsonSerializer.ROW_TYPE).toString()); - assertEquals(fieldTypesWithUDT, objects.get(RelJsonSerializer.FIELD_TYPES)); + RexNode expectedNode = + PPLFuncImpTable.INSTANCE.resolve( + rexBuilder, + BuiltinFunctionName.JSON_ARRAY, + rexBuilder.makeDynamicParam(rowTypeWithUDT.getFieldList().get(0).getType(), 0), + rexBuilder.makeDynamicParam(rowTypeWithUDT.getFieldList().get(1).getType(), 1), + rexBuilder.makeDynamicParam(rowTypeWithUDT.getFieldList().get(2).getType(), 2), + rexBuilder.makeDynamicParam(TYPE_FACTORY.createSqlType(SqlTypeName.OTHER, true), 3), + rexBuilder.makeDynamicParam(rowTypeWithUDT.getFieldList().get(4).getType(), 4)); + final ScriptParameterHelper helper = + new ScriptParameterHelper(rowTypeWithUDT.getFieldList(), fieldTypesWithUDT); + String serialized = serializer.serialize(rexNode, helper); + RexNode expr = serializer.deserialize(serialized); + assertEquals(expectedNode, expr); + assertEquals(List.of(0, 0, 0, 0, 0), helper.sources); + assertEquals(List.of("date", "time", "timestamp", "ip", "binary"), helper.digests); } @Test void testSerializeUnsupportedRexNode() { RexNode illegalRex = rexBuilder.makeRangeReference(rowType, 0, true); - assertThrows( - IllegalStateException.class, () -> serializer.serialize(illegalRex, rowType, fieldTypes)); + IllegalStateException.class, + () -> + serializer.serialize( + illegalRex, new ScriptParameterHelper(rowType.getFieldList(), fieldTypes))); } @Test @@ -121,8 +141,9 @@ void testDeserializeFunctionOutOfScope() { rexBuilder.makeInputRef(rowType.getFieldList().get(0).getType(), 0), rexBuilder.makeLiteral( 1, rexBuilder.getTypeFactory().createSqlType(SqlTypeName.INTEGER))); - - String code = serializer.serialize(outOfScopeRex, rowType, fieldTypes); + final ScriptParameterHelper helper = + new ScriptParameterHelper(rowType.getFieldList(), fieldTypes); + String code = serializer.serialize(outOfScopeRex, helper); assertThrows(IllegalStateException.class, () -> serializer.deserialize(code)); } @@ -148,12 +169,52 @@ void testSerializeIndexRemappedRexNode() { rexBuilder, BuiltinFunctionName.UPPER, rexBuilder.makeInputRef(rowType.getFieldList().get(0).getType(), 0)); + RexNode expectedNode = + PPLFuncImpTable.INSTANCE.resolve( + rexBuilder, + BuiltinFunctionName.UPPER, + rexBuilder.makeDynamicParam(rowType.getFieldList().get(0).getType(), 0)); + final ScriptParameterHelper helper = + new ScriptParameterHelper(originalRowType.getFieldList(), originalFieldTypes); + String code = serializer.serialize(originalRexUpper, helper); + RexNode rex = serializer.deserialize(code); + assertEquals(expectedNode, rex); + } - String code = serializer.serialize(originalRexUpper, originalRowType, originalFieldTypes); - Map objects = serializer.deserialize(code); - - assertEquals(remappedRexUpper, objects.get(RelJsonSerializer.EXPR)); - assertEquals(rowType, objects.get(RelJsonSerializer.ROW_TYPE)); - assertEquals(fieldTypes, objects.get(RelJsonSerializer.FIELD_TYPES)); + @Test + void testSerializeAndDeserializeLiteral() { + RelDataType rowTypeWithUDT = + rexBuilder + .getTypeFactory() + .builder() + .kind(StructKind.FULLY_QUALIFIED) + .add("date", UserDefinedFunctionUtils.NULLABLE_DATE_UDT) + .add("text", TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, true)) + .build(); + Map fieldTypesWithUDT = + Map.ofEntries( + Map.entry("date", OpenSearchDateType.of(ExprCoreType.DATE)), + Map.entry("text", OpenSearchDataType.of(MappingType.Text))); + RexNode rexNode = + PPLFuncImpTable.INSTANCE.resolve( + rexBuilder, + BuiltinFunctionName.JSON_ARRAY, + rexBuilder.makeInputRef(rowTypeWithUDT.getFieldList().get(0).getType(), 0), + rexBuilder.makeInputRef(rowTypeWithUDT.getFieldList().get(1).getType(), 1), + rexBuilder.makeLiteral(1, TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER, true))); + RexNode expectedNode = + PPLFuncImpTable.INSTANCE.resolve( + rexBuilder, + BuiltinFunctionName.JSON_ARRAY, + rexBuilder.makeDynamicParam(rowTypeWithUDT.getFieldList().get(0).getType(), 0), + rexBuilder.makeDynamicParam(rowTypeWithUDT.getFieldList().get(1).getType(), 1), + rexBuilder.makeDynamicParam(TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER, false), 2)); + final ScriptParameterHelper helper = + new ScriptParameterHelper(rowTypeWithUDT.getFieldList(), fieldTypesWithUDT); + String serialized = serializer.serialize(rexNode, helper); + RexNode expr = serializer.deserialize(serialized); + assertEquals(expectedNode, expr); + assertEquals(List.of(0, 1, 2), helper.sources); + assertEquals(List.of("date", "text", 1), helper.digests); } } From b47b7d6c6da1bf225f2df3faa1d21b48c97586c7 Mon Sep 17 00:00:00 2001 From: ritvibhatt <53196324+ritvibhatt@users.noreply.github.com> Date: Wed, 19 Nov 2025 01:05:52 -0800 Subject: [PATCH 66/99] Update PPL Command Documentation (#4562) * remove version info Signed-off-by: Ritvi Bhatt * fix defaults Signed-off-by: Ritvi Bhatt * update main docs and move aggregate functions Signed-off-by: Ritvi Bhatt * fix format Signed-off-by: Ritvi Bhatt * fix heading underlines Signed-off-by: Ritvi Bhatt * fix typos/content Signed-off-by: Ritvi Bhatt * fix formatting Signed-off-by: Ritvi Bhatt * update rex limitations Signed-off-by: Ritvi Bhatt * update underlines and bullet points Signed-off-by: Ritvi Bhatt * update function files Signed-off-by: Ritvi Bhatt * update index.rst with commands table Signed-off-by: Ritvi Bhatt * update formatting Signed-off-by: Ritvi Bhatt * update wording Signed-off-by: Ritvi Bhatt * move syntax Signed-off-by: Ritvi Bhatt * update note Signed-off-by: Ritvi Bhatt * fix append docs Signed-off-by: Ritvi Bhatt * fix subbullet formatting Signed-off-by: Ritvi Bhatt * fix subbullet formatting Signed-off-by: Ritvi Bhatt * fix bullet points Signed-off-by: Ritvi Bhatt * fix bin format Signed-off-by: Ritvi Bhatt * fix bullet points Signed-off-by: Ritvi Bhatt * update index.rst Signed-off-by: Ritvi Bhatt * fix stats Signed-off-by: Ritvi Bhatt * fix type in regexp_match Signed-off-by: Ritvi Bhatt --------- Signed-off-by: Ritvi Bhatt Signed-off-by: ritvibhatt <53196324+ritvibhatt@users.noreply.github.com> --- docs/user/ppl/cmd/ad.rst | 57 +- docs/user/ppl/cmd/append.rst | 21 +- docs/user/ppl/cmd/appendcol.rst | 42 +- docs/user/ppl/cmd/appendpipe.rst | 6 +- docs/user/ppl/cmd/bin.rst | 293 ++-------- docs/user/ppl/cmd/chart.rst | 54 +- docs/user/ppl/cmd/dedup.rst | 32 +- docs/user/ppl/cmd/describe.rst | 21 +- docs/user/ppl/cmd/eval.rst | 48 +- docs/user/ppl/cmd/eventstats.rst | 438 ++------------ docs/user/ppl/cmd/expand.rst | 48 +- docs/user/ppl/cmd/explain.rst | 39 +- docs/user/ppl/cmd/fields.rst | 56 +- docs/user/ppl/cmd/fillnull.rst | 64 +-- docs/user/ppl/cmd/flatten.rst | 47 +- docs/user/ppl/cmd/grok.rst | 24 +- docs/user/ppl/cmd/head.rst | 25 +- docs/user/ppl/cmd/join.rst | 93 ++- docs/user/ppl/cmd/kmeans.rst | 12 +- docs/user/ppl/cmd/lookup.rst | 77 +-- docs/user/ppl/cmd/ml.rst | 84 ++- docs/user/ppl/cmd/multisearch.rst | 51 +- docs/user/ppl/cmd/parse.rst | 17 +- docs/user/ppl/cmd/patterns.rst | 92 +-- docs/user/ppl/cmd/rare.rst | 36 +- docs/user/ppl/cmd/regex.rst | 28 +- docs/user/ppl/cmd/rename.rst | 43 +- docs/user/ppl/cmd/replace.rst | 36 +- docs/user/ppl/cmd/reverse.rst | 31 +- docs/user/ppl/cmd/rex.rst | 85 ++- docs/user/ppl/cmd/search.rst | 37 +- docs/user/ppl/cmd/showdatasources.rst | 11 +- docs/user/ppl/cmd/sort.rst | 33 +- docs/user/ppl/cmd/spath.rst | 25 +- docs/user/ppl/cmd/stats.rst | 689 ++--------------------- docs/user/ppl/cmd/streamstats.rst | 45 +- docs/user/ppl/cmd/subquery.rst | 72 +-- docs/user/ppl/cmd/syntax.rst | 4 +- docs/user/ppl/cmd/table.rst | 41 +- docs/user/ppl/cmd/timechart.rst | 50 +- docs/user/ppl/cmd/top.rst | 47 +- docs/user/ppl/cmd/trendline.rst | 66 +-- docs/user/ppl/cmd/where.rst | 30 +- docs/user/ppl/functions/aggregations.rst | 522 +++++++++++++++++ docs/user/ppl/functions/condition.rst | 48 +- docs/user/ppl/functions/json.rst | 40 -- docs/user/ppl/functions/statistical.rst | 4 +- docs/user/ppl/index.rst | 154 ++--- 48 files changed, 1457 insertions(+), 2461 deletions(-) create mode 100644 docs/user/ppl/functions/aggregations.rst diff --git a/docs/user/ppl/cmd/ad.rst b/docs/user/ppl/cmd/ad.rst index 938e6e79918..26502dea682 100644 --- a/docs/user/ppl/cmd/ad.rst +++ b/docs/user/ppl/cmd/ad.rst @@ -10,41 +10,43 @@ ad (deprecated by ml command) Description -============ +=========== | The ``ad`` command applies Random Cut Forest (RCF) algorithm in the ml-commons plugin on the search result returned by a PPL command. Based on the input, the command uses two types of RCF algorithms: fixed in time RCF for processing time-series data, batch RCF for processing non-time-series data. -Fixed In Time RCF For Time-series Data Command Syntax -===================================================== -ad +Syntax +====== -* number_of_trees(integer): optional. Number of trees in the forest. The default value is 30. -* shingle_size(integer): optional. A shingle is a consecutive sequence of the most recent records. The default value is 8. -* sample_size(integer): optional. The sample size used by stream samplers in this forest. The default value is 256. -* output_after(integer): optional. The number of points required by stream samplers before results are returned. The default value is 32. -* time_decay(double): optional. The decay factor used by stream samplers in this forest. The default value is 0.0001. -* anomaly_rate(double): optional. The anomaly rate. The default value is 0.005. -* time_field(string): mandatory. It specifies the time field for RCF to use as time-series data. -* date_format(string): optional. It's used for formatting time_field field. The default formatting is "yyyy-MM-dd HH:mm:ss". -* time_zone(string): optional. It's used for setting time zone for time_field filed. The default time zone is UTC. -* category_field(string): optional. It specifies the category field used to group inputs. Each category will be independently predicted. +Fixed In Time RCF For Time-series Data +-------------------------------------- +ad [number_of_trees] [shingle_size] [sample_size] [output_after] [time_decay] [anomaly_rate] [date_format] [time_zone] [category_field] +* number_of_trees: optional. Number of trees in the forest. **Default:** 30. +* shingle_size: optional. A shingle is a consecutive sequence of the most recent records. **Default:** 8. +* sample_size: optional. The sample size used by stream samplers in this forest. **Default:** 256. +* output_after: optional. The number of points required by stream samplers before results are returned. **Default:** 32. +* time_decay: optional. The decay factor used by stream samplers in this forest. **Default:** 0.0001. +* anomaly_rate: optional. The anomaly rate. **Default:** 0.005. +* time_field: mandatory. Specifies the time field for RCF to use as time-series data. +* date_format: optional. Used for formatting time_field. **Default:** "yyyy-MM-dd HH:mm:ss". +* time_zone: optional. Used for setting time zone for time_field. **Default:** "UTC". +* category_field: optional. Specifies the category field used to group inputs. Each category will be independently predicted. -Batch RCF for Non-time-series Data Command Syntax -================================================= -ad +Batch RCF For Non-time-series Data +---------------------------------- +ad [number_of_trees] [sample_size] [output_after] [training_data_size] [anomaly_score_threshold] [category_field] -* number_of_trees(integer): optional. Number of trees in the forest. The default value is 30. -* sample_size(integer): optional. Number of random samples given to each tree from the training data set. The default value is 256. -* output_after(integer): optional. The number of points required by stream samplers before results are returned. The default value is 32. -* training_data_size(integer): optional. The default value is the size of your training data set. -* anomaly_score_threshold(double): optional. The threshold of anomaly score. The default value is 1.0. -* category_field(string): optional. It specifies the category field used to group inputs. Each category will be independently predicted. +* number_of_trees: optional. Number of trees in the forest. **Default:** 30. +* sample_size: optional. Number of random samples given to each tree from the training data set. **Default:** 256. +* output_after: optional. The number of points required by stream samplers before results are returned. **Default:** 32. +* training_data_size: optional. **Default:** size of your training data set. +* anomaly_score_threshold: optional. The threshold of anomaly score. **Default:** 1.0. +* category_field: optional. Specifies the category field used to group inputs. Each category will be independently predicted. Example 1: Detecting events in New York City from taxi ridership data with time-series data =========================================================================================== -The example trains an RCF model and uses the model to detect anomalies in the time-series ridership data. +This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data. PPL query:: @@ -59,7 +61,7 @@ PPL query:: Example 2: Detecting events in New York City from taxi ridership data with time-series data independently with each category ============================================================================================================================ -The example trains an RCF model and uses the model to detect anomalies in the time-series ridership data with multiple category values. +This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data with multiple category values. PPL query:: @@ -76,7 +78,7 @@ PPL query:: Example 3: Detecting events in New York City from taxi ridership data with non-time-series data =============================================================================================== -The example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data. +This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data. PPL query:: @@ -91,7 +93,7 @@ PPL query:: Example 4: Detecting events in New York City from taxi ridership data with non-time-series data independently with each category ================================================================================================================================ -The example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data with multiple category values. +This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data with multiple category values. PPL query:: @@ -108,4 +110,3 @@ PPL query:: Limitations =========== The ``ad`` command can only work with ``plugins.calcite.enabled=false``. -It means ``ad`` command cannot work together with new PPL commands/functions introduced in 3.0.0 and above. diff --git a/docs/user/ppl/cmd/append.rst b/docs/user/ppl/cmd/append.rst index 25303aeb87b..6afdda6e439 100644 --- a/docs/user/ppl/cmd/append.rst +++ b/docs/user/ppl/cmd/append.rst @@ -1,6 +1,6 @@ -========= +====== append -========= +====== .. rubric:: Table of contents @@ -10,16 +10,12 @@ append Description -============ -| Using ``append`` command to append the result of a sub-search and attach it as additional rows to the bottom of the input search results (The main search). -The command aligns columns with the same field names and types. For different column fields between the main search and sub-search, NULL values are filled in the respective rows. - -Version -======= -3.3.0 +=========== +| The ``append`` command appends the result of a sub-search and attaches it as additional rows to the bottom of the input search results (The main search). +| The command aligns columns with the same field names and types. For different column fields between the main search and sub-search, NULL values are filled in the respective rows. Syntax -============ +====== append * sub-search: mandatory. Executes PPL commands as a secondary search. @@ -30,7 +26,7 @@ Limitations * **Schema Compatibility**: When fields with the same name exist between the main search and sub-search but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type, or use different field names (e.g., by renaming with ``eval`` or using ``fields`` to select non-conflicting columns). Example 1: Append rows from a count aggregation to existing search result -=============================================================== +========================================================================= This example appends rows from "count by gender" to "sum by gender, state". @@ -50,7 +46,7 @@ PPL query:: +----------+--------+-------+------------+ Example 2: Append rows with merged column names -==================================================================================== +=============================================== This example appends rows from "sum by gender" to "sum by gender, state" with merged column of same field name and type. @@ -68,4 +64,3 @@ PPL query:: | 28 | F | null | | 101 | M | null | +-----+--------+-------+ - diff --git a/docs/user/ppl/cmd/appendcol.rst b/docs/user/ppl/cmd/appendcol.rst index b9eeeae83b8..a9cb714256b 100644 --- a/docs/user/ppl/cmd/appendcol.rst +++ b/docs/user/ppl/cmd/appendcol.rst @@ -11,47 +11,15 @@ appendcol Description ============ -| (Experimental) -| (From 3.1.0) -| Using ``appendcol`` command to append the result of a sub-search and attach it alongside with the input search results (The main search). - -Version -======= -3.1.0 +The ``appendcol`` command appends the result of a sub-search and attaches it alongside with the input search results (The main search). Syntax -============ +====== appendcol [override=] -* override=: optional. Boolean field to specify should result from main-result be overwritten in the case of column name conflict. +* override=: optional. Boolean field to specify should result from main-result be overwritten in the case of column name conflict. **Default:** false. * sub-search: mandatory. Executes PPL commands as a secondary search. The sub-search uses the same data specified in the source clause of the main search results as its input. -Configuration -============= -This command requires Calcite enabled. - -Enable Calcite:: - - >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ - "transient" : { - "plugins.calcite.enabled" : true - } - }' - -Result set:: - - { - "acknowledged": true, - "persistent": { - "plugins": { - "calcite": { - "enabled": "true" - } - } - }, - "transient": {} - } - Example 1: Append a count aggregation to existing search result =============================================================== @@ -103,6 +71,8 @@ PPL query:: Example 3: Append multiple sub-search results ============================================= +This example shows how to chain multiple appendcol commands to add columns from different sub-searches. + PPL query:: PPL> source=employees | fields name, dept, age | appendcol [ stats avg(age) as avg_age ] | appendcol [ stats max(age) as max_age ]; @@ -124,6 +94,8 @@ PPL query:: Example 4: Override case of column name conflict ================================================ +This example demonstrates the override option when column names conflict between main search and sub-search. + PPL query:: PPL> source=employees | stats avg(age) as agg by dept | appendcol override=true [ stats max(age) as agg by dept ]; diff --git a/docs/user/ppl/cmd/appendpipe.rst b/docs/user/ppl/cmd/appendpipe.rst index 43c4dd1e84d..c309517724a 100644 --- a/docs/user/ppl/cmd/appendpipe.rst +++ b/docs/user/ppl/cmd/appendpipe.rst @@ -11,13 +11,9 @@ appendpipe Description ============ -| Using ``appendpipe`` command to appends the result of the subpipeline to the search results. Unlike a subsearch, the subpipeline is not run first.The subpipeline is run when the search reaches the appendpipe command. +| The ``appendpipe`` command appends the result of the subpipeline to the search results. Unlike a subsearch, the subpipeline is not run first.The subpipeline is run when the search reaches the appendpipe command. The command aligns columns with the same field names and types. For different column fields between the main search and sub-search, NULL values are filled in the respective rows. -Version -======= -3.3.0 - Syntax ============ appendpipe [] diff --git a/docs/user/ppl/cmd/bin.rst b/docs/user/ppl/cmd/bin.rst index 13fad6527fb..f42fb1da84f 100644 --- a/docs/user/ppl/cmd/bin.rst +++ b/docs/user/ppl/cmd/bin.rst @@ -1,6 +1,6 @@ -============= +=== bin -============= +=== .. rubric:: Table of contents @@ -9,234 +9,53 @@ bin :depth: 2 -.. note:: - - Available since version 3.3 - - Description ============ | The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. Syntax -============ +====== bin [span=] [minspan=] [bins=] [aligntime=(earliest | latest | )] [start=] [end=] * field: mandatory. The field to bin. Accepts numeric or time-based fields. * span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters. -* minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. -* bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. -* aligntime: optional. Align the bin times for time-based fields. Valid only for time-based discretization. Options: - - earliest: Align bins to the earliest timestamp in the data - - latest: Align bins to the latest timestamp in the data - - : Align bins to a specific epoch time value or time modifier expression -* start: optional. The starting value for binning range. If not specified, uses the minimum field value. -* end: optional. The ending value for binning range. If not specified, uses the maximum field value. - -Parameter Priority Order -======================== -When multiple parameters are specified, the bin command follows this priority order: - -1. **span** (highest priority) - Set the interval for binning -2. **minspan** (second priority) - Set the Minimum span for binning -3. **bins** (third priority) - Sets the maximum amount of bins -4. **start/end** (fourth priority) - Expand the range for binning -5. **default** (lowest priority) - Automatic magnitude-based binning - -**Note**: The **aligntime** parameter is a modifier that only applies to span-based binning (when using **span**) for time-based fields. It does not affect the priority order for bin type selection. - -Parameters -============ - -span Parameter --------------- -Specifies the width of each bin interval with support for multiple span types: - -**1. Numeric Span ** -- ``span=1000`` - Creates bins of width 1000 for numeric fields -- Calculation: ``floor(field / span) * span`` -- Dynamic binning: No artificial limits on number of bins, no "Other" category - -**2. Log-based Span (logarithmic binning)** -- **Syntax**: ``[]log[]`` or ``logN`` where N is the base -- **Examples**: - - ``span=log10`` - Base 10 logarithmic bins (coefficient=1) - - ``span=2log10`` - Base 10 with coefficient 2 - - ``span=log2`` - Base 2 logarithmic bins - - ``span=log3`` - Base 3 logarithmic bins (arbitrary base) - - ``span=1.5log3`` - Base 3 with coefficient 1.5 -- **Algorithm**: - - For each value: ``bin_number = floor(log_base(value/coefficient))`` - - Bin boundaries: ``[coefficient * base^n, coefficient * base^(n+1))`` - - Only creates bins where data exists (data-driven approach) -- **Rules**: - - Coefficient: Real number ≥ 1.0 and < base (optional, defaults to 1) - - Base: Real number > 1.0 (required) - - Creates logarithmic bin boundaries instead of linear - -**3. Time Scale Span (comprehensive time units)** -- **Subseconds**: ``us`` (microseconds), ``ms`` (milliseconds), ``cs`` (centiseconds), ``ds`` (deciseconds) -- **Seconds**: ``s``, ``sec``, ``secs``, ``second``, ``seconds`` -- **Minutes**: ``m``, ``min``, ``mins``, ``minute``, ``minutes`` -- **Hours**: ``h``, ``hr``, ``hrs``, ``hour``, ``hours`` -- **Days**: ``d``, ``day``, ``days`` - **Uses precise daily binning algorithm** -- **Months**: ``mon``, ``month``, ``months`` - **Uses precise monthly binning algorithm** -- **Examples**: - - ``span=30seconds`` - - ``span=15minutes`` - - ``span=2hours`` - - ``span=7days`` - - ``span=4months`` - - ``span=500ms`` - - ``span=100us`` - - ``span=50cs`` (centiseconds) - - ``span=2ds`` (deciseconds) - -**Daily Binning Algorithm (for day-based spans)** - -For daily spans (``1days``, ``7days``, ``30days``), the implementation uses a **precise daily binning algorithm** with Unix epoch reference: - -1. **Unix Epoch Reference**: Uses January 1, 1970 as the fixed reference point for all daily calculations -2. **Modular Arithmetic**: Calculates ``days_since_epoch % span_days`` to find position within span cycle -3. **Consistent Alignment**: Ensures identical input dates always produce identical bin start dates -4. **Date String Output**: Returns formatted date strings (``YYYY-MM-DD``) instead of timestamps - -**Algorithm Example**: For July 28, 2025 (day 20,297 since Unix epoch): -- ``span=6days``: 20,297 % 6 = 5 → bin starts July 23, 2025 (``"2025-07-23"``) -- ``span=7days``: 20,297 % 7 = 4 → bin starts July 24, 2025 (``"2025-07-24"``) - -**Monthly Binning Algorithm (for month-based spans)** - -For monthly spans (``1months``, ``4months``, ``6months``), the implementation uses a **precise monthly binning algorithm** with Unix epoch reference: - -1. **Unix Epoch Reference**: Uses January 1970 as the fixed reference point for all monthly calculations -2. **Modular Arithmetic**: Calculates ``months_since_epoch % span_months`` to find position within span cycle -3. **Consistent Alignment**: Ensures identical input dates always produce identical bin start months -4. **Month String Output**: Returns formatted month strings (``YYYY-MM``) instead of timestamps - -**Algorithm Example**: For July 2025 (666 months since Unix epoch): -- ``span=4months``: 666 % 4 = 2 → bin starts at month 664 = May 2025 (``"2025-05"``) -- ``span=6months``: 666 % 6 = 0 → bin starts at month 666 = July 2025 (``"2025-07"``) - -This ensures precise and consistent behavior for both daily and monthly binning operations. - -minspan Parameter ------------------ -Specifies the minimum allowed interval size using a magnitude-based algorithm. The algorithm works as follows: - -1. **Calculate default width**: ``10^FLOOR(LOG10(data_range))`` - the largest power of 10 that fits within the data range -2. **Apply minspan constraint**: - - If ``default_width >= minspan``: use the default width - - If ``default_width < minspan``: use ``10^CEIL(LOG10(minspan))`` - -This ensures bins use human-readable widths (powers of 10) while respecting the minimum span requirement. - -**Example**: For age data with range 20-40 (range=20) and minspan=11: -- Default width = 10^FLOOR(LOG10(20)) = 10^1 = 10 -- Since minspan=11 > 10, use 10^CEIL(LOG10(11)) = 10^2 = 100 -- Result: Single bin "0-100" covering all age values - -aligntime Parameter -------------------- -For time-based fields, aligntime allows you to specify how bins should be aligned. This parameter is essential for creating consistent time-based bins that align to meaningful boundaries like start of day, hour, etc. - -**Alignment Options:** - -* ``earliest``: Aligns bins to the earliest timestamp in the dataset -* ``latest``: Aligns bins to the latest timestamp in the dataset -* ````: Aligns bins to a specific epoch timestamp (e.g., 1640995200) -* ````: Aligns bins using time modifier expressions (standard-compatible) - -**Time Modifier Expressions:** - -Time modifiers provide a flexible way to align bins to specific time boundaries: - -* ``@d``: Align to start of day (00:00:00) -* ``@d+``: Align to start of day plus offset (e.g., ``@d+3h`` = 03:00:00) -* ``@d-``: Align to start of day minus offset (e.g., ``@d-1h`` = 23:00:00 previous day) - -**Supported Time Spans:** - -**Aligntime applies to:** -* ``us``, ``ms``, ``cs``, ``ds``: Subsecond units (microseconds, milliseconds, centiseconds, deciseconds) -* ``s``, ``sec``, ``secs``, ``seconds``: Seconds -* ``m``, ``min``, ``mins``, ``minutes``: Minutes -* ``h``, ``hr``, ``hrs``, ``hours``: Hours - -**Aligntime ignored for:** -* ``d``, ``days``: Days - automatically aligns to midnight using daily binning algorithm -* ``M``, ``months``: Months - automatically aligns to month start using monthly binning algorithm -**How Aligntime Works:** + * Supports numeric (e.g., ``1000``), logarithmic (e.g., ``log10``, ``2log10``), and time intervals + * Available time units: -The aligntime parameter modifies the binning calculation: -* **Without aligntime**: ``floor(timestamp / span) * span`` -* **With aligntime**: ``floor((timestamp - aligntime) / span) * span + aligntime`` -* **With day/month spans**: Aligntime is ignored, natural boundaries used via specialized algorithms + * microsecond (us) + * millisecond (ms) + * centisecond (cs) + * decisecond (ds) + * second (s, sec, secs, second, seconds) + * minute (m, min, mins, minute, minutes) + * hour (h, hr, hrs, hour, hours) + * day (d, day, days) + * month (mon, month, months) -This ensures that bins are aligned to meaningful time boundaries rather than arbitrary epoch-based intervals. - -bins Parameter --------------- -Automatically calculates the span using a mathematical O(1) algorithm to create human-readable bin widths based on powers of 10. - -**Validation**: The bins parameter must be between 2 and 50000 (inclusive). Values outside this range will result in an error. - -The algorithm uses **mathematical optimization** instead of iteration for O(1) performance: - -1. **Validate bins**: Ensure ``2 ≤ bins ≤ 50000`` -2. **Calculate data range**: ``data_range = max_value - min_value`` -3. **Calculate target width**: ``target_width = data_range / requested_bins`` -4. **Find optimal starting point**: ``exponent = CEIL(LOG10(target_width))`` -5. **Select optimal width**: ``optimal_width = 10^exponent`` -6. **Account for boundaries**: If ``max_value % optimal_width == 0``, add one extra bin -7. **Adjust if needed**: If ``actual_bins > requested_bins``, use ``10^(exponent + 1)`` - -**Mathematical Formula**: -- ``optimal_width = 10^CEIL(LOG10(data_range / requested_bins))`` -- **Boundary condition**: ``actual_bins = CEIL(data_range / optimal_width) + (max_value % optimal_width == 0 ? 1 : 0)`` - -**Example**: For age data with range 20-50 (range=30) and bins=3: -- ``target_width = 30 / 3 = 10`` -- ``exponent = CEIL(LOG10(10)) = CEIL(1.0) = 1`` -- ``optimal_width = 10^1 = 10`` -- ``actual_bins = CEIL(30/10) = 3`` ≤ 3 -- Result: Use width=10, creating bins "20-30", "30-40", "40-50" - -start and end Parameters -------------------------- -Define the range for binning using an effective range expansion algorithm. The key insight is that start/end parameters affect the **width calculation**, not just the binning boundaries. - -**Algorithm:** -1. **Calculate effective range**: Only expand, never shrink the data range - - ``effective_min = MIN(start, data_min)`` if start specified - - ``effective_max = MAX(end, data_max)`` if end specified - - ``effective_range = effective_max - effective_min`` - -2. **Apply magnitude-based width calculation** with boundary handling: - - If ``effective_range`` is exactly a power of 10: ``width = 10^(FLOOR(LOG10(effective_range)) - 1)`` - - Otherwise: ``width = 10^FLOOR(LOG10(effective_range))`` - -3. **Create bins** using the calculated width +* minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. +* bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. The bins parameter must be between 2 and 50000 (inclusive). +* aligntime: optional. Align the bin times for time-based fields. Valid only for time-based discretization. Options: -**Examples**: + * earliest: Align bins to the earliest timestamp in the data + * latest: Align bins to the latest timestamp in the data + * : Align bins to a specific epoch time value or time modifier expression -- **end=100000**: effective_range = 100,000 (exact power of 10) - - Width = 10^(5-1) = 10^4 = 10,000 - - Result: 5 bins "0-10000", "10000-20000", ..., "40000-50000" +* start: optional. The starting value for binning range. **Default:** minimum field value. +* end: optional. The ending value for binning range. **Default:** maximum field value. -- **end=100001**: effective_range = 100,001 (not exact power of 10) - - Width = 10^FLOOR(LOG10(100,001)) = 10^5 = 100,000 - - Result: Single bin "0-100000" with count 1000 +**Parameter Behavior** -Examples -======== +When multiple parameters are specified, priority order is: span > minspan > bins > start/end > default. -Span Parameter Examples -======================= +**Special Behaviors:** +* Logarithmic span (``log10``, ``2log10``, etc.) creates logarithmic bin boundaries instead of linear +* Daily/monthly spans automatically align to calendar boundaries and return date strings (YYYY-MM-DD) instead of timestamps +* aligntime parameter only applies to time spans excluding days/months +* start/end parameters expand the range (never shrink) and affect bin width calculation Example 1: Basic numeric span -============================== +============================= PPL query:: @@ -266,7 +85,7 @@ PPL query:: Example 3: Logarithmic span (log10) -==================================== +=================================== PPL query:: @@ -280,7 +99,7 @@ PPL query:: +------------------+ Example 4: Logarithmic span with coefficient -============================================= +============================================ PPL query:: @@ -294,11 +113,8 @@ PPL query:: | 20000.0-200000.0 | +------------------+ -Bins Parameter Examples -======================= - Example 5: Basic bins parameter -================================ +=============================== PPL query:: @@ -313,7 +129,7 @@ PPL query:: +------------+ Example 6: Low bin count -========================= +======================== PPL query:: @@ -326,7 +142,7 @@ PPL query:: +-------+ Example 7: High bin count -========================== +========================= PPL query:: @@ -340,11 +156,8 @@ PPL query:: | 28-29 | 13 | +-------+----------------+ -Minspan Parameter Examples -========================== - Example 8: Basic minspan -========================= +======================== PPL query:: @@ -359,7 +172,7 @@ PPL query:: +-------+----------------+ Example 9: Large minspan -========================== +======================== PPL query:: @@ -371,11 +184,8 @@ PPL query:: | 0-1000 | +--------+ -Start/End Parameter Examples -============================ - Example 10: Start and end range -================================ +=============================== PPL query:: @@ -388,7 +198,7 @@ PPL query:: +-------+ Example 11: Large end range -============================ +=========================== PPL query:: @@ -401,7 +211,7 @@ PPL query:: +----------+ Example 12: Span with start/end -================================ +=============================== PPL query:: @@ -416,11 +226,8 @@ PPL query:: | 33-34 | +-------+ -Time-based Examples -=================== - Example 13: Hour span -====================== +===================== PPL query:: @@ -435,7 +242,7 @@ PPL query:: +---------------------+-------+ Example 14: Minute span -======================== +======================= PPL query:: @@ -450,7 +257,7 @@ PPL query:: +---------------------+-------+ Example 15: Second span -======================== +======================= PPL query:: @@ -465,7 +272,7 @@ PPL query:: +---------------------+-------+ Example 16: Daily span -======================= +====================== PPL query:: @@ -479,11 +286,8 @@ PPL query:: | 2025-07-24 00:00:00 | 9187 | +---------------------+-------+ -Aligntime Parameter Examples -============================ - Example 17: Aligntime with time modifier -========================================= +======================================== PPL query:: @@ -498,7 +302,7 @@ PPL query:: +---------------------+-------+ Example 18: Aligntime with epoch timestamp -=========================================== +========================================== PPL query:: @@ -512,11 +316,8 @@ PPL query:: | 2025-07-28 00:40:00 | 9187 | +---------------------+-------+ -Default Binning Example -======================= - Example 19: Default behavior (no parameters) -============================================== +============================================ PPL query:: diff --git a/docs/user/ppl/cmd/chart.rst b/docs/user/ppl/cmd/chart.rst index fe8aac4a3e0..4ffe3e7abef 100644 --- a/docs/user/ppl/cmd/chart.rst +++ b/docs/user/ppl/cmd/chart.rst @@ -14,68 +14,51 @@ Description The ``chart`` command transforms search results by applying a statistical aggregation function and optionally grouping the data by one or two fields. The results are suitable for visualization as a two-dimension chart when grouping by two fields, where unique values in the second group key can be pivoted to column names. -Version -======= -3.4.0 - Syntax ====== +chart [limit=(top|bottom) ] [useother=] [usenull=] [nullstr=] [otherstr=] [ by ] | [over ] [ by ] -.. code-block:: text - - chart - [limit=(top|bottom) ] [useother=] [usenull=] [nullstr=] [otherstr=] - - [ by ] | [over ] [ by ] - -**Parameters:** +* limit: optional. Specifies the number of categories to display when using column split. Each unique value in the column split field represents a category. **Default:** top10. -* **limit**: optional. Specifies the number of categories to display when using column split. Each unique value in the column split field represents a category. - - * Default: top10 * Syntax: ``limit=(top|bottom)`` or ``limit=`` (defaults to top) * When ``limit=K`` is set, the top or bottom K categories from the column split field are retained; the remaining categories are grouped into an "OTHER" category if ``useother`` is not set to false. * Set limit to 0 to show all categories without any limit. * Use ``limit=topK`` or ``limit=bottomK`` to specify whether to retain the top or bottom K column categories. The ranking is based on the sum of aggregated values for each column category. For example, ``chart limit=top3 count() by region, product`` keeps the 3 products with the highest total counts across all regions. If not specified, top is used by default. * Only applies when column split is present (by 2 fields or over...by... coexists). -* **useother**: optional. Controls whether to create an "OTHER" category for categories beyond the limit. +* useother: optional. Controls whether to create an "OTHER" category for categories beyond the limit. **Default:** true - * Default: true * When set to false, only the top/bottom N categories (based on limit) are shown without an "OTHER" category. * When set to true, categories beyond the limit are grouped into an "OTHER" category. * Only applies when using column split and when there are more categories than the limit. -* **usenull**: optional. Controls whether to group events without a column split (i.e. whose column split is null) into a separate "NULL" category. +* usenull: optional. Controls whether to group events without a column split (i.e. whose column split is null) into a separate "NULL" category. **Default:** true - * Default: true * ``usenull`` only applies to column split. * Row split should always be non-null value. Documents with null values in row split will be ignored. * When ``usenull=false``, events with a null column split are excluded from results. * When ``usenull=true``, events with a null column split are grouped into a separate "NULL" category. -* **nullstr**: optional. Specifies the category name for rows that do not contain the column split value. +* nullstr: optional. Specifies the category name for rows that do not contain the column split value. **Default:** "NULL" - * Default: "NULL" * Only applies when ``usenull`` is set to true. -* **otherstr**: optional. Specifies the category name for the "OTHER" category. +* otherstr: optional. Specifies the category name for the "OTHER" category. **Default:** "OTHER" - * Default: "OTHER" * Only applies when ``useother`` is set to true and there are values beyond the limit. -* **aggregation_function**: mandatory. The aggregation function to apply to the data. +* aggregation_function: mandatory. The aggregation function to apply to the data. * Currently, only a single aggregation function is supported. - * Available functions: aggregation functions supported by the `stats `_ command. + * Available functions: aggregation functions supported by the stats command. -* **by**: optional. Groups the results by either one field (row split) or two fields (row split and column split) +* by: optional. Groups the results by either one field (row split) or two fields (row split and column split) * ``limit``, ``useother``, and ``usenull`` apply to the column split * Results are returned as individual rows for each combination. * If not specified, the aggregation is performed across all documents. -* **over...by...**: optional. Alternative syntax for grouping by multiple fields. +* over...by...: optional. Alternative syntax for grouping by multiple fields. * ``over by `` groups the results by both fields. * Using ``over`` alone on one field is equivalent to ``by `` @@ -87,11 +70,8 @@ Notes * Documents with null values in fields used by the aggregation function are excluded from aggregation. For example, in ``chart avg(balance) over deptno, group``, documents where ``balance`` is null are excluded from the average calculation. * The aggregation metric appears as the last column in the result. Result columns are ordered as: [row-split] [column-split] [aggregation-metrics]. -Examples -======== - Example 1: Basic aggregation without grouping ---------------------------------------------- +============================================= This example calculates the average balance across all accounts. @@ -106,7 +86,7 @@ PPL query:: +--------------+ Example 2: Group by single field --------------------------------- +================================ This example calculates the count of accounts grouped by gender. @@ -122,7 +102,7 @@ PPL query:: +--------+---------+ Example 3: Using over and by for multiple field grouping --------------------------------------------------------- +======================================================== This example shows average balance grouped by both gender and age fields. Note that the age column in the result is converted to string type. @@ -140,7 +120,7 @@ PPL query:: +--------+-----+--------------+ Example 4: Using basic limit functionality ------------------------------------------- +========================================== This example limits the results to show only the top 1 age group. Note that the age column in the result is converted to string type. @@ -157,7 +137,7 @@ PPL query:: +--------+-------+---------+ Example 5: Using limit with other parameters --------------------------------------------- +============================================ This example shows using limit with useother and custom otherstr parameters. @@ -175,7 +155,7 @@ PPL query:: +-------+--------------+---------+ Example 6: Using null parameters --------------------------------- +================================ This example shows using limit with usenull and custom nullstr parameters. @@ -193,7 +173,7 @@ PPL query:: +-----------+------------------------+---------+ Example 7: Using chart command with span ----------------------------------------- +======================================== This example demonstrates using span for grouping age ranges. diff --git a/docs/user/ppl/cmd/dedup.rst b/docs/user/ppl/cmd/dedup.rst index 264d3b3c9b8..bc3e9a48ca5 100644 --- a/docs/user/ppl/cmd/dedup.rst +++ b/docs/user/ppl/cmd/dedup.rst @@ -1,6 +1,6 @@ -============= +===== dedup -============= +===== .. rubric:: Table of contents @@ -10,25 +10,22 @@ dedup Description -============ -| Using ``dedup`` command to remove identical document defined by field from the search result. - +=========== +The ``dedup`` command removes duplicate documents defined by specified fields from the search result. Syntax -============ +====== dedup [int] [keepempty=] [consecutive=] - -* int: optional. The ``dedup`` command retains multiple events for each combination when you specify . The number for must be greater than 0. If you do not specify a number, only the first occurring event is kept. All other duplicates are removed from the results. **Default:** 1 -* keepempty: optional. if true, keep the document if the any field in the field-list has NULL value or field is MISSING. **Default:** false. +* int: optional. The ``dedup`` command retains multiple events for each combination when you specify . The number for must be greater than 0. All other duplicates are removed from the results. **Default:** 1 +* keepempty: optional. If set to true, keep the document if the any field in the field-list has NULL value or field is MISSING. **Default:** false. * consecutive: optional. If set to true, removes only events with duplicate combinations of values that are consecutive. **Default:** false. * field-list: mandatory. The comma-delimited field list. At least one field is required. - Example 1: Dedup by one field ============================= -The example show dedup the document with gender field. +This example shows deduplicating documents by gender field. PPL query:: @@ -44,7 +41,7 @@ PPL query:: Example 2: Keep 2 duplicates documents ====================================== -The example show dedup the document with gender field keep 2 duplication. +This example shows deduplicating documents by gender field while keeping 2 duplicates. PPL query:: @@ -59,9 +56,9 @@ PPL query:: +----------------+--------+ Example 3: Keep or Ignore the empty field by default -============================================ +==================================================== -The example show dedup the document by keep null value field. +This example shows deduplicating documents while keeping null values. PPL query:: @@ -77,7 +74,7 @@ PPL query:: +----------------+-----------------------+ -The example show dedup the document by ignore the empty value field. +This example shows deduplicating documents while ignoring null values. PPL query:: @@ -93,9 +90,9 @@ PPL query:: Example 4: Dedup in consecutive document -========================================= +======================================== -The example show dedup the consecutive document. +This example shows deduplicating consecutive documents. PPL query:: @@ -112,4 +109,3 @@ PPL query:: Limitations =========== The ``dedup`` with ``consecutive=true`` command can only work with ``plugins.calcite.enabled=false``. -It means ``dedup`` with ``consecutive=true`` command cannot work together with new PPL commands/functions introduced in 3.0.0 and above. diff --git a/docs/user/ppl/cmd/describe.rst b/docs/user/ppl/cmd/describe.rst index c732480e328..2fbb4003414 100644 --- a/docs/user/ppl/cmd/describe.rst +++ b/docs/user/ppl/cmd/describe.rst @@ -1,6 +1,6 @@ -============= +======== describe -============= +======== .. rubric:: Table of contents @@ -10,24 +10,21 @@ describe Description -============ -| Using ``describe`` command to query metadata of the index. ``describe`` command could be only used as the first command in the PPL query. - +=========== +Use the ``describe`` command to query metadata of the index. ``describe`` command can only be used as the first command in the PPL query. Syntax -============ -describe .. +====== +describe [dataSource.][schema.] * dataSource: optional. If dataSource is not provided, it resolves to opensearch dataSource. -* schema: optional. If schema is not provided, it resolves to default schema. +* schema: optional. If schema is not provided, it resolves to default schema. * tablename: mandatory. describe command must specify which tablename to query from. - - Example 1: Fetch all the metadata ================================= -The example describes accounts index. +This example describes the accounts index. PPL query:: @@ -52,7 +49,7 @@ PPL query:: Example 2: Fetch metadata with condition and filter =================================================== -The example retrieves columns with type long in accounts index. +This example retrieves columns with type bigint in the accounts index. PPL query:: diff --git a/docs/user/ppl/cmd/eval.rst b/docs/user/ppl/cmd/eval.rst index 187f4e3f7cc..ada7b179526 100644 --- a/docs/user/ppl/cmd/eval.rst +++ b/docs/user/ppl/cmd/eval.rst @@ -1,6 +1,6 @@ -============= +==== eval -============= +==== .. rubric:: Table of contents @@ -10,21 +10,20 @@ eval Description -============ -| The ``eval`` command evaluate the expression and append the result to the search result. - +=========== +The ``eval`` command evaluates the expression and appends the result to the search result. Syntax -============ +====== eval = ["," = ]... -* field: mandatory. If the field name not exist, a new field is added. If the field name already exists, it will be overrided. -* expression: mandatory. Any expression support by the system. +* field: mandatory. If the field name does not exist, a new field is added. If the field name already exists, it will be overridden. +* expression: mandatory. Any expression supported by the system. -Example 1: Create the new field -=============================== +Example 1: Create a new field +============================= -The example show to create new field doubleAge for each document. The new doubleAge is the evaluation result of age multiply by 2. +This example shows creating a new field doubleAge for each document. The new doubleAge field is the result of multiplying age by 2. PPL query:: @@ -40,10 +39,10 @@ PPL query:: +-----+-----------+ -Example 2: Override the existing field -====================================== +Example 2: Override an existing field +===================================== -The example show to override the exist age field with age plus 1. +This example shows overriding the existing age field by adding 1 to it. PPL query:: @@ -58,10 +57,10 @@ PPL query:: | 34 | +-----+ -Example 3: Create the new field with field defined in eval -========================================================== +Example 3: Create a new field with field defined in eval +======================================================== -The example show to create a new field ddAge with field defined in eval command. The new field ddAge is the evaluation result of doubleAge multiply by 2, the doubleAge is defined in the eval command. +This example shows creating a new field ddAge using a field defined in the same eval command. The new field ddAge is the result of multiplying doubleAge by 2, where doubleAge is defined in the same eval command. PPL query:: @@ -76,12 +75,12 @@ PPL query:: | 33 | 66 | 132 | +-----+-----------+-------+ -Example 4: String concatenation with + operator(need to enable calcite) -=============================================== +Example 4: String concatenation +=============================== -The example shows how to use the + operator for string concatenation in eval command. You can concatenate string literals and field values. +This example shows using the + operator for string concatenation. You can concatenate string literals and field values. -PPL query example 1 - Concatenating a literal with a field:: +PPL query:: source=accounts | eval greeting = 'Hello ' + firstname | fields firstname, greeting @@ -96,7 +95,12 @@ Expected result:: | Dale | Hello Dale | +---------------+---------------------+ -PPL query example 2 - Multiple concatenations with type casting:: +Example 5: Multiple string concatenation with type casting +========================================================== + +This example shows multiple concatenations with type casting from numeric to string. + +PPL query:: source=accounts | eval full_info = 'Name: ' + firstname + ', Age: ' + CAST(age AS STRING) | fields firstname, age, full_info diff --git a/docs/user/ppl/cmd/eventstats.rst b/docs/user/ppl/cmd/eventstats.rst index 958b28e606b..755af0486e4 100644 --- a/docs/user/ppl/cmd/eventstats.rst +++ b/docs/user/ppl/cmd/eventstats.rst @@ -1,6 +1,6 @@ -============= +========== eventstats -============= +========== .. rubric:: Table of contents @@ -10,10 +10,8 @@ eventstats Description -============ -| (Experimental) -| (From 3.1.0) -| Using ``eventstats`` command to enriches your event data with calculated summary statistics. It operates by analyzing specified fields within your events, computing various statistical measures, and then appending these results as new fields to each original event. +=========== +| The ``eventstats`` command enriches your event data with calculated summary statistics. It operates by analyzing specified fields within your events, computing various statistical measures, and then appending these results as new fields to each original event. | Key aspects of `eventstats`: @@ -24,403 +22,61 @@ Description | Difference between ``stats`` and ``eventstats`` The ``stats`` and ``eventstats`` commands are both used for calculating statistics, but they have some key differences in how they operate and what they produce: -* Output Format: - * ``stats``: Produces a summary table with only the calculated statistics. - * ``eventstats``: Adds the calculated statistics as new fields to the existing events, preserving the original data. -* Event Retention: - * ``stats``: Reduces the result set to only the statistical summary, discarding individual events. - * ``eventstats``: Retains all original events and adds new fields with the calculated statistics. -* Use Cases: - * ``stats``: Best for creating summary reports or dashboards. Often used as a final command to summarize results. - * ``eventstats``: Useful when you need to enrich events with statistical context for further analysis or filtering. Can be used mid-search to add statistics that can be used in subsequent commands. +* Output Format + * ``stats``: Produces a summary table with only the calculated statistics. + * ``eventstats``: Adds the calculated statistics as new fields to the existing events, preserving the original data. -Version -======= -3.1.0 +* Event Retention + + * ``stats``: Reduces the result set to only the statistical summary, discarding individual events. + * ``eventstats``: Retains all original events and adds new fields with the calculated statistics. + +* Use Cases + + * ``stats``: Best for creating summary reports or dashboards. Often used as a final command to summarize results. + * ``eventstats``: Useful when you need to enrich events with statistical context for further analysis or filtering. Can be used mid-search to add statistics that can be used in subsequent commands. Syntax ====== eventstats ... [by-clause] +* function: mandatory. An aggregation function or window function. +* by-clause: optional. Groups results by specified fields or expressions. Syntax: by [span-expression,] [field,]... **Default:** aggregation over the entire result set. +* span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). For example, ``span(age, 10)`` creates 10-year age buckets, ``span(timestamp, 1h)`` creates hourly buckets. -* function: mandatory. A aggregation function or window function. - -* by-clause: optional. - - * Syntax: by [span-expression,] [field,]... - * Description: The by clause could be the fields and expressions like scalar functions and aggregation functions. Besides, the span clause can be used to split specific field into buckets in the same interval, the stats then does the aggregation by these span buckets. - * Default: If no is specified, the stats command returns only one row, which is the aggregation over the entire result set. - -* span-expression: optional, at most one. - - * Syntax: span(field_expr, interval_expr) - * Description: The unit of the interval expression is the natural unit by default. If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``. - -* Available time unit: -+----------------------------+ -| Span Interval Units | -+============================+ -| millisecond (ms) | -+----------------------------+ -| second (s) | -+----------------------------+ -| minute (m, case sensitive) | -+----------------------------+ -| hour (h) | -+----------------------------+ -| day (d) | -+----------------------------+ -| week (w) | -+----------------------------+ -| month (M, case sensitive) | -+----------------------------+ -| quarter (q) | -+----------------------------+ -| year (y) | -+----------------------------+ + * Available time units: + + * millisecond (ms) + * second (s) + * minute (m, case sensitive) + * hour (h) + * day (d) + * week (w) + * month (M, case sensitive) + * quarter (q) + * year (y) Aggregation Functions ===================== -COUNT ------ - -Description ->>>>>>>>>>> - -Usage: Returns a count of the number of expr in the rows retrieved by a SELECT statement. - -Example:: - - os> source=accounts | fields account_number, gender, age | eventstats count() | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+---------+ - | account_number | gender | age | count() | - |----------------+--------+-----+---------| - | 1 | M | 32 | 4 | - | 6 | M | 36 | 4 | - | 13 | F | 28 | 4 | - | 18 | M | 33 | 4 | - +----------------+--------+-----+---------+ - -SUM ---- - -Description ->>>>>>>>>>> - -Usage: SUM(expr). Returns the sum of expr. - -Example:: - - os> source=accounts | fields account_number, gender, age | eventstats sum(age) by gender | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+----------+ - | account_number | gender | age | sum(age) | - |----------------+--------+-----+----------| - | 1 | M | 32 | 101 | - | 6 | M | 36 | 101 | - | 13 | F | 28 | 28 | - | 18 | M | 33 | 101 | - +----------------+--------+-----+----------+ - -AVG ---- - -Description ->>>>>>>>>>> - -Usage: AVG(expr). Returns the average value of expr. - -Example:: - - os> source=accounts | fields account_number, gender, age | eventstats avg(age) by gender | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+--------------------+ - | account_number | gender | age | avg(age) | - |----------------+--------+-----+--------------------| - | 1 | M | 32 | 33.666666666666664 | - | 6 | M | 36 | 33.666666666666664 | - | 13 | F | 28 | 28.0 | - | 18 | M | 33 | 33.666666666666664 | - +----------------+--------+-----+--------------------+ - -MAX ---- - -Description ->>>>>>>>>>> - -Usage: MAX(expr). Returns the maximum value of expr. - -Example:: - - os> source=accounts | fields account_number, gender, age | eventstats max(age) | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+----------+ - | account_number | gender | age | max(age) | - |----------------+--------+-----+----------| - | 1 | M | 32 | 36 | - | 6 | M | 36 | 36 | - | 13 | F | 28 | 36 | - | 18 | M | 33 | 36 | - +----------------+--------+-----+----------+ - -MIN ---- - -Description ->>>>>>>>>>> - -Usage: MIN(expr). Returns the minimum value of expr. - -Example:: - - os> source=accounts | fields account_number, gender, age | eventstats min(age) by gender | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+----------+ - | account_number | gender | age | min(age) | - |----------------+--------+-----+----------| - | 1 | M | 32 | 32 | - | 6 | M | 36 | 32 | - | 13 | F | 28 | 28 | - | 18 | M | 33 | 32 | - +----------------+--------+-----+----------+ - - -VAR_SAMP --------- - -Description ->>>>>>>>>>> -Usage: VAR_SAMP(expr). Returns the sample variance of expr. +The eventstats command supports the following aggregation functions: -Example:: +* COUNT: Count of values +* SUM: Sum of numeric values +* AVG: Average of numeric values +* MAX: Maximum value +* MIN: Minimum value +* VAR_SAMP: Sample variance +* VAR_POP: Population variance +* STDDEV_SAMP: Sample standard deviation +* STDDEV_POP: Population standard deviation +* DISTINCT_COUNT/DC: Distinct count of values +* EARLIEST: Earliest value by timestamp +* LATEST: Latest value by timestamp - os> source=accounts | fields account_number, gender, age | eventstats var_samp(age) | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+--------------------+ - | account_number | gender | age | var_samp(age) | - |----------------+--------+-----+--------------------| - | 1 | M | 32 | 10.916666666666666 | - | 6 | M | 36 | 10.916666666666666 | - | 13 | F | 28 | 10.916666666666666 | - | 18 | M | 33 | 10.916666666666666 | - +----------------+--------+-----+--------------------+ - - -VAR_POP -------- - -Description ->>>>>>>>>>> - -Usage: VAR_POP(expr). Returns the population standard variance of expr. - -Example:: - - os> source=accounts | fields account_number, gender, age | eventstats var_pop(age) | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+--------------+ - | account_number | gender | age | var_pop(age) | - |----------------+--------+-----+--------------| - | 1 | M | 32 | 8.1875 | - | 6 | M | 36 | 8.1875 | - | 13 | F | 28 | 8.1875 | - | 18 | M | 33 | 8.1875 | - +----------------+--------+-----+--------------+ - -STDDEV_SAMP ------------ - -Description ->>>>>>>>>>> - -Usage: STDDEV_SAMP(expr). Return the sample standard deviation of expr. - -Example:: - - os> source=accounts | fields account_number, gender, age | eventstats stddev_samp(age) | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+-------------------+ - | account_number | gender | age | stddev_samp(age) | - |----------------+--------+-----+-------------------| - | 1 | M | 32 | 3.304037933599835 | - | 6 | M | 36 | 3.304037933599835 | - | 13 | F | 28 | 3.304037933599835 | - | 18 | M | 33 | 3.304037933599835 | - +----------------+--------+-----+-------------------+ - - -STDDEV_POP ----------- - -Description ->>>>>>>>>>> - -Usage: STDDEV_POP(expr). Return the population standard deviation of expr. - -Example:: - - os> source=accounts | fields account_number, gender, age | eventstats stddev_pop(age) | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+--------------------+ - | account_number | gender | age | stddev_pop(age) | - |----------------+--------+-----+--------------------| - | 1 | M | 32 | 2.8613807855648994 | - | 6 | M | 36 | 2.8613807855648994 | - | 13 | F | 28 | 2.8613807855648994 | - | 18 | M | 33 | 2.8613807855648994 | - +----------------+--------+-----+--------------------+ - - -DISTINCT_COUNT, DC(Since 3.3) ------------------- - -Description ->>>>>>>>>>> - -Usage: DISTINCT_COUNT(expr), DC(expr). Returns the approximate number of distinct values using the HyperLogLog++ algorithm. Both functions are equivalent. - -For details on algorithm accuracy and precision control, see the `OpenSearch Cardinality Aggregation documentation `_. - - -Example:: - - os> source=accounts | fields account_number, gender, state, age | eventstats dc(state) as distinct_states, distinct_count(state) as dc_states_alt by gender | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-------+-----+-----------------+---------------+ - | account_number | gender | state | age | distinct_states | dc_states_alt | - |----------------+--------+-------+-----+-----------------+---------------| - | 1 | M | IL | 32 | 3 | 3 | - | 6 | M | TN | 36 | 3 | 3 | - | 13 | F | VA | 28 | 1 | 1 | - | 18 | M | MD | 33 | 3 | 3 | - +----------------+--------+-------+-----+-----------------+---------------+ - -EARLIEST (Since 3.3) ---------------------- - -Description ->>>>>>>>>>> - -Usage: EARLIEST(field [, time_field]). Return the earliest value of a field based on timestamp ordering. This function enriches each event with the earliest value found within the specified grouping. - -* field: mandatory. The field to return the earliest value for. -* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. - -Note: This function requires Calcite to be enabled (see `Configuration`_ section above). - -Example:: - - os> source=events | fields @timestamp, host, message | eventstats earliest(message) by host | sort @timestamp; - fetched rows / total rows = 8/8 - +---------------------+---------+----------------------+-------------------+ - | @timestamp | host | message | earliest(message) | - |---------------------+---------+----------------------+-------------------| - | 2023-01-01 10:00:00 | server1 | Starting up | Starting up | - | 2023-01-01 10:05:00 | server2 | Initializing | Initializing | - | 2023-01-01 10:10:00 | server1 | Ready to serve | Starting up | - | 2023-01-01 10:15:00 | server2 | Ready | Initializing | - | 2023-01-01 10:20:00 | server1 | Processing requests | Starting up | - | 2023-01-01 10:25:00 | server2 | Handling connections | Initializing | - | 2023-01-01 10:30:00 | server1 | Shutting down | Starting up | - | 2023-01-01 10:35:00 | server2 | Maintenance mode | Initializing | - +---------------------+---------+----------------------+-------------------+ - -Example with custom time field:: - - os> source=events | fields event_time, status, category | eventstats earliest(status, event_time) by category | sort event_time; - fetched rows / total rows = 8/8 - +---------------------+------------+----------+------------------------------+ - | event_time | status | category | earliest(status, event_time) | - |---------------------+------------+----------+------------------------------| - | 2023-01-01 09:55:00 | pending | orders | pending | - | 2023-01-01 10:00:00 | active | users | active | - | 2023-01-01 10:05:00 | processing | orders | pending | - | 2023-01-01 10:10:00 | inactive | users | active | - | 2023-01-01 10:15:00 | completed | orders | pending | - | 2023-01-01 10:20:00 | pending | users | active | - | 2023-01-01 10:25:00 | cancelled | orders | pending | - | 2023-01-01 10:30:00 | inactive | users | active | - +---------------------+------------+----------+------------------------------+ - - -LATEST (Since 3.3) -------------------- - -Description ->>>>>>>>>>> - -Usage: LATEST(field [, time_field]). Return the latest value of a field based on timestamp ordering. This function enriches each event with the latest value found within the specified grouping. - -* field: mandatory. The field to return the latest value for. -* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. - -Note: This function requires Calcite to be enabled (see `Configuration`_ section above). - -Example:: - - os> source=events | fields @timestamp, host, message | eventstats latest(message) by host | sort @timestamp; - fetched rows / total rows = 8/8 - +---------------------+---------+----------------------+------------------+ - | @timestamp | host | message | latest(message) | - |---------------------+---------+----------------------+------------------| - | 2023-01-01 10:00:00 | server1 | Starting up | Shutting down | - | 2023-01-01 10:05:00 | server2 | Initializing | Maintenance mode | - | 2023-01-01 10:10:00 | server1 | Ready to serve | Shutting down | - | 2023-01-01 10:15:00 | server2 | Ready | Maintenance mode | - | 2023-01-01 10:20:00 | server1 | Processing requests | Shutting down | - | 2023-01-01 10:25:00 | server2 | Handling connections | Maintenance mode | - | 2023-01-01 10:30:00 | server1 | Shutting down | Shutting down | - | 2023-01-01 10:35:00 | server2 | Maintenance mode | Maintenance mode | - +---------------------+---------+----------------------+------------------+ - -Example with custom time field:: - - os> source=events | fields event_time, status message, category | eventstats latest(status, event_time) by category | sort event_time; - fetched rows / total rows = 8/8 - +---------------------+------------+----------------------+----------+----------------------------+ - | event_time | status | message | category | latest(status, event_time) | - |---------------------+------------+----------------------+----------+----------------------------| - | 2023-01-01 09:55:00 | pending | Starting up | orders | cancelled | - | 2023-01-01 10:00:00 | active | Initializing | users | inactive | - | 2023-01-01 10:05:00 | processing | Ready to serve | orders | cancelled | - | 2023-01-01 10:10:00 | inactive | Ready | users | inactive | - | 2023-01-01 10:15:00 | completed | Processing requests | orders | cancelled | - | 2023-01-01 10:20:00 | pending | Handling connections | users | inactive | - | 2023-01-01 10:25:00 | cancelled | Shutting down | orders | cancelled | - | 2023-01-01 10:30:00 | inactive | Maintenance mode | users | inactive | - +---------------------+------------+----------------------+----------+----------------------------+ - - -Configuration -============= -This command requires Calcite enabled. - -Enable Calcite:: - - >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ - "transient" : { - "plugins.calcite.enabled" : true - } - }' - -Result set:: - - { - "acknowledged": true, - "persistent": { - "plugins": { - "calcite": { - "enabled": "true" - } - } - }, - "transient": {} - } +For detailed documentation of each function, see `Aggregation Functions <../functions/aggregations.rst>`_. Usage ===== @@ -436,9 +92,9 @@ Eventstats:: Example 1: Calculate the average, sum and count of a field by group -================================================================== +=================================================================== -The example show calculate the average age, sum age and count of events of all the accounts group by gender. +This example shows calculating the average age, sum of age, and count of events for all accounts grouped by gender. PPL query:: @@ -456,7 +112,7 @@ PPL query:: Example 2: Calculate the count by a gender and span =================================================== -The example gets the count of age by the interval of 10 years and group by gender. +This example shows counting events by age intervals of 5 years, grouped by gender. PPL query:: diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst index 77061385478..c8065a2da0f 100644 --- a/docs/user/ppl/cmd/expand.rst +++ b/docs/user/ppl/cmd/expand.rst @@ -1,6 +1,6 @@ -============= +====== expand -============= +====== .. rubric:: Table of contents @@ -10,38 +10,27 @@ expand Description -============ -| (Experimental) - -Use the ``expand`` command on a nested array field to transform a single -document into multiple documents—each containing one element from the array. -All other fields in the original document are duplicated across the resulting -documents. +=========== +| The ``expand`` command transforms a single document with a nested array field into multiple documents—each containing one element from the array. All other fields in the original document are duplicated across the resulting documents. -The expand command generates one row per element in the specified array field: +| Key aspects of ``expand``: +* It generates one row per element in the specified array field. * The specified array field is converted into individual rows. -* If an alias is provided, the expanded values appear under the alias instead - of the original field name. -* If the specified field is an empty array, the row is retained with the - expanded field set to null. - -Version -======= -Since 3.1.0 +* If an alias is provided, the expanded values appear under the alias instead of the original field name. +* If the specified field is an empty array, the row is retained with the expanded field set to null. Syntax ====== expand [as alias] -* field: The field to be expanded (exploded). Currently only nested arrays are - supported. -* alias: (Optional) The name to use instead of the original field name. +* field: mandatory. The field to be expanded (exploded). Currently only nested arrays are supported. +* alias: optional. The name to use instead of the original field name. -Example: expand address field with an alias -=========================================== +Example 1: Expand address field with an alias +============================================= Given a dataset ``migration`` with the following data: @@ -65,19 +54,8 @@ PPL query:: +-------+-----+-------------------------------------------------------------------------------------------+ Limitations -============ +=========== * The ``expand`` command currently only supports nested arrays. Primitive fields storing arrays are not supported. E.g. a string field storing an array of strings cannot be expanded with the current implementation. -* The command works only with Calcite enabled. This can be set with the - following command: - - .. code-block:: - - PUT /_cluster/settings - { - "persistent":{ - "plugins.calcite.enabled": true - } - } diff --git a/docs/user/ppl/cmd/explain.rst b/docs/user/ppl/cmd/explain.rst index c06025022e6..c32e83d2d39 100644 --- a/docs/user/ppl/cmd/explain.rst +++ b/docs/user/ppl/cmd/explain.rst @@ -1,6 +1,6 @@ -============= +======= explain -============= +======= .. rubric:: Table of contents @@ -10,25 +10,26 @@ explain Description -============ -| Using ``explain`` command to explain the plan of query which is used very often for query translation and troubleshooting. ``explain`` command could be only used as the first command in the PPL query. - +=========== +The ``explain`` command explains the plan of query which is often used for query translation and troubleshooting. The ``explain`` command can only be used as the first command in the PPL query. Syntax -============ +====== explain queryStatement -* mode: optional. There are 4 explain modes: "simple", "standard", "cost", "extended". If mode is not provided, "standard" will be set by default. - * standard: The default mode. Display logical and physical plan with pushdown information (DSL). - * simple: Display the logical plan tree without attributes. Only works with Calcite. - * cost: Display the standard information plus plan cost attributes. Only works with Calcite. - * extended: Display the standard information plus generated code. Only works with Calcite. +* mode: optional. There are 4 explain modes: "simple", "standard", "cost", "extended". **Default:** standard. + + * standard: The default mode. Display logical and physical plan with pushdown information (DSL). + * simple: Display the logical plan tree without attributes. + * cost: Display the standard information plus plan cost attributes. + * extended: Display the standard information plus generated code. + * queryStatement: mandatory. A PPL query to explain. Example 1: Explain a PPL query in v2 engine -============================== +=========================================== When Calcite is disabled (plugins.calcite.enabled=false), explaining a PPL query will get its physical plan of v2 engine and pushdown information. PPL query:: @@ -56,7 +57,7 @@ Explain:: } Example 2: Explain a PPL query in v3 engine -=================================================== +=========================================== When Calcite is enabled (plugins.calcite.enabled=true), explaining a PPL query will get its logical and physical plan of v3 engine and pushdown information. @@ -81,9 +82,9 @@ Explain:: Example 3: Explain a PPL query with simple mode -========================================================= +=============================================== -When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query will the "simple" mode. +When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query with the "simple" mode. PPL query:: @@ -102,9 +103,9 @@ Explain:: } Example 4: Explain a PPL query with cost mode -========================================================= +============================================= -When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query will the "cost" mode. +When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query with the "cost" mode. PPL query:: @@ -126,9 +127,7 @@ Explain:: } Example 5: Explain a PPL query with extended mode -========================================================= - -When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query will the "extended" mode. +================================================= PPL query:: diff --git a/docs/user/ppl/cmd/fields.rst b/docs/user/ppl/cmd/fields.rst index 18aeaff4d06..81ccff71b80 100644 --- a/docs/user/ppl/cmd/fields.rst +++ b/docs/user/ppl/cmd/fields.rst @@ -1,6 +1,6 @@ -============= +====== fields -============= +====== .. rubric:: Table of contents @@ -10,26 +10,20 @@ fields Description -============ -Using ``field`` command to keep or remove fields from the search result. - -Enhanced field features are available when the Calcite engine is enabled with 3.3+ version. When Calcite is disabled, only basic comma-delimited field selection is supported. +=========== +The ``fields`` command keeps or removes fields from the search result. Syntax -============ -field [+|-] - -* index: optional. if the plus (+) is used, only the fields specified in the field list will be keep. if the minus (-) is used, all the fields specified in the field list will be removed. **Default** + -* field list: mandatory. comma-delimited keep or remove fields. - +====== +fields [+|-] -Basic Examples -============== +* +|-: optional. If the plus (+) is used, only the fields specified in the field list will be kept. If the minus (-) is used, all the fields specified in the field list will be removed. **Default:** +. +* field-list: mandatory. Comma-delimited or space-delimited list of fields to keep or remove. Supports wildcard patterns. Example 1: Select specified fields from result ----------------------------------------------- +============================================== -The example show fetch account_number, firstname and lastname fields from search results. +This example shows selecting account_number, firstname and lastname fields from search results. PPL query:: @@ -45,9 +39,9 @@ PPL query:: +----------------+-----------+----------+ Example 2: Remove specified fields from result ----------------------------------------------- +============================================== -The example show fetch remove account_number field from search results. +This example shows removing the account_number field from search results. PPL query:: @@ -62,13 +56,8 @@ PPL query:: | Dale | Adams | +-----------+----------+ -Enhanced Features (Version 3.3.0) -=========================================== - -All features in this section require the Calcite engine to be enabled. When Calcite is disabled, only basic comma-delimited field selection is supported. - Example 3: Space-delimited field selection -------------------------------------------- +========================================== Fields can be specified using spaces instead of commas, providing a more concise syntax. @@ -88,7 +77,7 @@ PPL query:: +-----------+----------+-----+ Example 4: Prefix wildcard pattern ------------------------------------ +================================== Select fields starting with a pattern using prefix wildcards. @@ -106,7 +95,7 @@ PPL query:: +----------------+ Example 5: Suffix wildcard pattern ------------------------------------ +================================== Select fields ending with a pattern using suffix wildcards. @@ -124,7 +113,7 @@ PPL query:: +-----------+----------+ Example 6: Contains wildcard pattern ------------------------------------- +==================================== Select fields containing a pattern using contains wildcards. @@ -139,7 +128,7 @@ PPL query:: +----------------+-----------+-----------------+---------+-------+-----+----------------------+----------+ Example 7: Mixed delimiter syntax ----------------------------------- +================================= Combine spaces and commas for flexible field specification. @@ -157,7 +146,7 @@ PPL query:: +-----------+----------------+----------+ Example 8: Field deduplication -------------------------------- +============================== Automatically prevents duplicate columns when wildcards expand to already specified fields. @@ -177,7 +166,7 @@ PPL query:: Note: Even though ``firstname`` is explicitly specified and would also match ``*name``, it appears only once due to automatic deduplication. Example 9: Full wildcard selection ------------------------------------ +================================== Select all available fields using ``*`` or ```*```. This selects all fields defined in the index schema, including fields that may contain null values. @@ -194,7 +183,7 @@ PPL query:: Note: The ``*`` wildcard selects fields based on the index schema, not on data content. Fields with null values are included in the result set. Use backticks ```*``` if the plain ``*`` doesn't return all expected fields. Example 10: Wildcard exclusion -------------------------------- +============================== Remove fields using wildcard patterns with the minus (-) operator. @@ -211,11 +200,6 @@ PPL query:: | 18 | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | +----------------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+ -Requirements -============ -- **Calcite Engine**: All enhanced features require the Calcite engine to be enabled -- **Backward Compatibility**: Basic comma-delimited syntax continues to work when Calcite is disabled -- **Error Handling**: Attempting to use enhanced features without Calcite will result in an ``UnsupportedOperationException`` See Also ======== diff --git a/docs/user/ppl/cmd/fillnull.rst b/docs/user/ppl/cmd/fillnull.rst index 483755f723f..7ebceee019a 100644 --- a/docs/user/ppl/cmd/fillnull.rst +++ b/docs/user/ppl/cmd/fillnull.rst @@ -1,6 +1,6 @@ -============= +======== fillnull -============= +======== .. rubric:: Table of contents @@ -10,39 +10,31 @@ fillnull Description -============ -Using ``fillnull`` command to fill null with provided value in one or more fields in the search result. +=========== +| The ``fillnull`` command fills null values with the provided value in one or more fields in the search result. Syntax -============ +====== -fillnull with [in ] +| fillnull with [in ] +| fillnull using = [, = ] +| fillnull value= [] -fillnull using = [, = ] +* replacement: mandatory. The value used to replace null values. +* field-list: optional. List of fields to apply the replacement to. Can be comma-delimited (with ``with`` or ``using`` syntax) or space-delimited (with ``value=`` syntax). **Default:** all fields. +* field: mandatory when using ``using`` syntax. Individual field name to assign a specific replacement value. -fillnull value= [] +* **Syntax variations** - -Parameters -============ - -* replacement: Mandatory. The value used to replace `null`s. - -* field-list: Optional. Comma-delimited (when using ``with`` or ``using``) or space-delimited (when using ``value=``) list of fields. The `null` values in the field will be replaced with the values from the replacement. **Default:** If no field specified, the replacement is applied to all fields. - -**Syntax Variations:** - -* ``with in `` - Apply same value to specified fields -* ``using =, ...`` - Apply different values to different fields -* ``value= []`` - Alternative syntax with optional space-delimited field list - - -Examples -============ + * ``with in `` - Apply same value to specified fields + * ``using =, ...`` - Apply different values to different fields + * ``value= []`` - Alternative syntax with optional space-delimited field list Example 1: Replace null values with a specified value on one field -------------------------------------------------------------------- +================================================================== + +This example shows replacing null values in the email field with ''. PPL query:: @@ -58,7 +50,9 @@ PPL query:: +-----------------------+----------+ Example 2: Replace null values with a specified value on multiple fields -------------------------------------------------------------------------- +======================================================================== + +This example shows replacing null values in both email and employer fields with the same replacement value ''. PPL query:: @@ -74,7 +68,9 @@ PPL query:: +-----------------------+-------------+ Example 3: Replace null values with a specified value on all fields --------------------------------------------------------------------- +=================================================================== + +This example shows replacing null values in all fields when no field list is specified. PPL query:: @@ -90,7 +86,9 @@ PPL query:: +-----------------------+-------------+ Example 4: Replace null values with multiple specified values on multiple fields ---------------------------------------------------------------------------------- +================================================================================ + +This example shows using different replacement values for different fields using the 'using' syntax. PPL query:: @@ -107,7 +105,9 @@ PPL query:: Example 5: Replace null with specified value on specific fields (value= syntax) --------------------------------------------------------------------------------- +=============================================================================== + +This example shows using the alternative 'value=' syntax to replace null values in specific fields. PPL query:: @@ -123,7 +123,7 @@ PPL query:: +-----------------------+-------------+ Example 6: Replace null with specified value on all fields (value= syntax) ---------------------------------------------------------------------------- +========================================================================== When no field list is specified, the replacement applies to all fields in the result. @@ -141,7 +141,7 @@ PPL query:: +-----------------------+-------------+ Limitations -============ +=========== * The ``fillnull`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. * When applying the same value to all fields without specifying field names, all fields must be the same type. For mixed types, use separate fillnull commands or explicitly specify fields. * The replacement value type must match ALL field types in the field list. When applying the same value to multiple fields, all fields must be the same type (all strings or all numeric). diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst index 3c1780531f1..e366fe32daa 100644 --- a/docs/user/ppl/cmd/flatten.rst +++ b/docs/user/ppl/cmd/flatten.rst @@ -1,6 +1,6 @@ -============= +======= flatten -============= +======= .. rubric:: Table of contents @@ -10,42 +10,25 @@ flatten Description =========== +| The ``flatten`` command flattens a struct or an object field into separate fields in a document. -Use ``flatten`` command to flatten a struct or an object field into separate -fields in a document. - -The flattened fields will be ordered **lexicographically** by their original -key names in the struct. I.e. if the struct has keys ``b``, ``c`` and ``Z``, -the flattened fields will be ordered as ``Z``, ``b``, ``c``. +| The flattened fields will be ordered **lexicographically** by their original key names in the struct. For example, if the struct has keys ``b``, ``c`` and ``Z``, the flattened fields will be ordered as ``Z``, ``b``, ``c``. -Note that ``flatten`` should not be applied to arrays. Please use ``expand`` -command to expand an array field into multiple rows instead. However, since -an array can be stored in a non-array field in OpenSearch, when expanding a -field storing a nested array, only the first element of the array will be -flattened. - -Version -======= -3.1.0 +| Note that ``flatten`` should not be applied to arrays. Use the ``expand`` command to expand an array field into multiple rows instead. However, since an array can be stored in a non-array field in OpenSearch, when flattening a field storing a nested array, only the first element of the array will be flattened. Syntax ====== flatten [as ()] -* field: The field to be flattened. Only object and nested fields are - supported. -* alias-list: (Optional) The names to use instead of the original key names. - Names are separated by commas. It is advised to put the alias-list in - parentheses if there is more than one alias. E.g. both - ``country, state, city`` and ``(country, state, city)`` are supported, - but the latter is advised. Its length must match the number of keys in the - struct field. Please note that the provided alias names **must** follow - the lexicographical order of the corresponding original keys in the struct. +* field: mandatory. The field to be flattened. Only object and nested fields are supported. +* alias-list: optional. The names to use instead of the original key names. Names are separated by commas. It is advised to put the alias-list in parentheses if there is more than one alias. The length must match the number of keys in the struct field. The provided alias names **must** follow the lexicographical order of the corresponding original keys in the struct. Example: flatten an object field with aliases ============================================= +This example shows flattening a message object field and using aliases to rename the flattened fields. + Given the following index ``my-index`` .. code-block:: @@ -116,15 +99,3 @@ Limitations invisible. As an alternative, you can change to ``source=my-index | flatten message``. - -* The command works only with Calcite enabled. This can be set with the - following command: - - .. code-block:: - - PUT /_cluster/settings - { - "persistent":{ - "plugins.calcite.enabled": true - } - } diff --git a/docs/user/ppl/cmd/grok.rst b/docs/user/ppl/cmd/grok.rst index 35f3b0c8461..836d01b6a89 100644 --- a/docs/user/ppl/cmd/grok.rst +++ b/docs/user/ppl/cmd/grok.rst @@ -1,6 +1,6 @@ -============= +==== grok -============= +==== .. rubric:: Table of contents @@ -10,26 +10,20 @@ grok Description -============ -| The ``grok`` command parses a text field with a grok pattern and appends the results to the search result. - +=========== +The ``grok`` command parses a text field with a grok pattern and appends the results to the search result. Syntax -============ +====== grok * field: mandatory. The field must be a text field. -* pattern: mandatory string. The grok pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. - -Grok Pattern -============ - -The grok pattern is used to match the text field of each document to extract new fields. +* pattern: mandatory. The grok pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. Example 1: Create the new field =============================== -The example shows how to create new field ``host`` for each document. ``host`` will be the host name after ``@`` in ``email`` field. Parsing a null field will return an empty string. +This example shows how to create new field ``host`` for each document. ``host`` will be the host name after ``@`` in ``email`` field. Parsing a null field will return an empty string. PPL query:: @@ -48,7 +42,7 @@ PPL query:: Example 2: Override the existing field ====================================== -The example shows how to override the existing ``address`` field with street number removed. +This example shows how to override the existing ``address`` field with street number removed. PPL query:: @@ -66,7 +60,7 @@ PPL query:: Example 3: Using grok to parse logs =================================== -The example shows how to use grok to parse raw logs. +This example shows how to use grok to parse raw logs. PPL query:: diff --git a/docs/user/ppl/cmd/head.rst b/docs/user/ppl/cmd/head.rst index c13a495a77f..a17f283026d 100644 --- a/docs/user/ppl/cmd/head.rst +++ b/docs/user/ppl/cmd/head.rst @@ -1,6 +1,6 @@ -============= +==== head -============= +==== .. rubric:: Table of contents @@ -10,21 +10,20 @@ head Description -============ -| The ``head`` command returns the first N number of specified results after an optional offset in search order. - +=========== +The ``head`` command returns the first N number of specified results after an optional offset in search order. Syntax -============ +====== head [] [from ] -* : optional integer. number of results to return. **Default:** 10 -* : integer after optional ``from``. number of results to skip. **Default:** 0 +* size: optional integer. Number of results to return. **Default:** 10 +* offset: optional integer after ``from``. Number of results to skip. **Default:** 0 Example 1: Get first 10 results -=========================================== +=============================== -The example show maximum 10 results from accounts index. +This example shows getting a maximum of 10 results from accounts index. PPL query:: @@ -40,9 +39,9 @@ PPL query:: +-----------+-----+ Example 2: Get first N results -=========================================== +============================== -The example show first N results from accounts index. +This example shows getting the first 3 results from accounts index. PPL query:: @@ -59,7 +58,7 @@ PPL query:: Example 3: Get first N results after offset M ============================================= -The example show first N results after offset M from accounts index. +This example shows getting the first 3 results after offset 1 from accounts index. PPL query:: diff --git a/docs/user/ppl/cmd/join.rst b/docs/user/ppl/cmd/join.rst index 3b986071261..61dfc31042d 100644 --- a/docs/user/ppl/cmd/join.rst +++ b/docs/user/ppl/cmd/join.rst @@ -1,6 +1,6 @@ -============= +==== join -============= +==== .. rubric:: Table of contents @@ -11,63 +11,39 @@ join Description =========== -| Using ``join`` command to combines two datasets together. The left side could be an index or results from a piped commands, the right side could be either an index or a subsearch. +| The ``join`` command combines two datasets together. The left side could be an index or results from a piped commands, the right side could be either an index or a subsearch. -Version -======= -3.0.0 +Syntax +====== -Basic syntax in 3.0.0 -===================== -| [joinType] join [leftAlias] [rightAlias] (on | where) +Basic syntax: +------------- -* joinType: optional. The type of join to perform. The default is ``inner`` if not specified. Other option is ``left``, ``semi``, ``anti`` and performance sensitive types ``right``, ``full`` and ``cross``. -* leftAlias: optional. The subsearch alias to use with the left join side, to avoid ambiguous naming. Fixed pattern: ``left = `` -* rightAlias: optional. The subsearch alias to use with the right join side, to avoid ambiguous naming. Fixed pattern: ``right = `` -* joinCriteria: mandatory. It could be any comparison expression. Must follow with ``on`` (since 3.0.0) or ``where`` (since 3.3.0) keyword. +[joinType] join [leftAlias] [rightAlias] (on | where) + +* joinType: optional. The type of join to perform. Options: ``left``, ``semi``, ``anti``, and performance sensitive types ``right``, ``full``, ``cross``. **Default:** ``inner``. +* leftAlias: optional. The subsearch alias to use with the left join side, to avoid ambiguous naming. Pattern: ``left = `` +* rightAlias: optional. The subsearch alias to use with the right join side, to avoid ambiguous naming. Pattern: ``right = `` +* joinCriteria: mandatory. Any comparison expression. Must follow ``on`` or ``where`` keyword. * right-dataset: mandatory. Right dataset could be either an ``index`` or a ``subsearch`` with/without alias. -Extended syntax since 3.3.0 -=========================== -| join [type=] [overwrite=] [max=n] ( | [leftAlias] [rightAlias] (on | where) ) -| From 3.3.0, the join syntax is enhanced to support more join options and join with field list. +Extended syntax: +---------------- -* type=: optional. The type of join to perform. The default is ``inner`` if not specified. Other option is ``left``, ``outer``(alias of ``left``), ``semi``, ``anti`` and performance sensitive types ``right``, ``full`` and ``cross``. -* overwrite=: optional. Only works with ``join-field-list``. Specifies whether duplicate-named fields from (subsearch results) should replace corresponding fields in the main search results. The default value is ``true``. -* max=n: optional. Controls how many subsearch results could be joined against to each row in main search. The default value is 0, means unlimited. -* join-field-list: optional. The fields used to build the join criteria. The join field list must exist on both sides. If no join field list is specified, all fields common to both sides will be used as join keys. The comma is optional. +join [type=] [overwrite=] [max=n] ( | [leftAlias] [rightAlias] (on | where) ) + +* type: optional. Join type using extended syntax. Options: ``left``, ``outer`` (alias of ``left``), ``semi``, ``anti``, and performance sensitive types ``right``, ``full``, ``cross``. **Default:** ``inner``. +* overwrite: optional boolean. Only works with ``join-field-list``. Specifies whether duplicate-named fields from right-dataset should replace corresponding fields in the main search results. **Default:** ``true``. +* max: optional integer. Controls how many subsearch results could be joined against each row in main search. **Default:** 0 (unlimited). +* join-field-list: optional. The fields used to build the join criteria. The join field list must exist on both sides. If not specified, all fields common to both sides will be used as join keys. +* leftAlias: optional. Same as basic syntax when used with extended syntax. +* rightAlias: optional. Same as basic syntax when used with extended syntax. +* joinCriteria: mandatory. Same as basic syntax when used with extended syntax. +* right-dataset: mandatory. Same as basic syntax. Configuration ============= -plugins.calcite.enabled ------------------------ - -This command requires Calcite enabled. In 3.0.0, as an experimental the Calcite configuration is disabled by default. - -Enable Calcite:: - - >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ - "transient" : { - "plugins.calcite.enabled" : true - } - }' - -Result set:: - - { - "acknowledged": true, - "persistent": { - "plugins": { - "calcite": { - "enabled": "true" - } - } - }, - "transient": {} - } - - plugins.ppl.join.subsearch_maxout --------------------------------- @@ -96,7 +72,7 @@ Change the join.subsearch_maxout to 5000:: Usage ===== -Join on criteria (in 3.0.0):: +Basic join syntax:: source = table1 | inner join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c source = table1 | inner join left = l right = r where l.a = r.a table2 | fields l.a, r.a, b, c @@ -113,7 +89,7 @@ Join on criteria (in 3.0.0):: source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields t1.a, t2.a source = table1 | join left = l right = r on l.a = r.a [ source = table2 ] as s | fields l.a, s.a -Extended syntax and option supported (since 3.3.0):: +Extended syntax with options:: source = table1 | join type=outer left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c source = table1 | join type=left left = l right = r where l.a = r.a table2 | fields l.a, r.a, b, c @@ -127,6 +103,8 @@ Extended syntax and option supported (since 3.3.0):: Example 1: Two indices join =========================== +This example shows joining two indices using the basic join syntax. + PPL query:: os> source = state_country | inner join left=a right=b ON a.name = b.name occupation | stats avg(salary) by span(age, 10) as age_span, b.country; @@ -144,6 +122,8 @@ PPL query:: Example 2: Join with subsearch ============================== +This example shows joining with a subsearch using the basic join syntax. + PPL query:: PPL> source = state_country as a | where country = 'USA' OR country = 'England' | left join ON a.name = b.name [ source = occupation | where salary > 0 | fields name, country, salary | sort salary | head 3 ] as b | stats avg(salary) by span(age, 10) as age_span, b.country; @@ -159,6 +139,8 @@ PPL query:: Example 3: Join with field list =============================== +This example shows joining using the extended syntax with field list. + PPL query:: PPL> source = state_country | where country = 'USA' OR country = 'England' | join type=left overwrite=true name [ source = occupation | where salary > 0 | fields name, country, salary | sort salary | head 3 ] | stats avg(salary) by span(age, 10) as age_span, country; @@ -174,6 +156,8 @@ PPL query:: Example 4: Join with options ============================ +This example shows joining using the extended syntax with additional options. + PPL query:: os> source = state_country | join type=inner overwrite=false max=1 name occupation | stats avg(salary) by span(age, 10) as age_span, country; @@ -189,7 +173,7 @@ PPL query:: Limitations =========== -For basic syntax in 3.0.0, if fields in the left outputs and right outputs have the same name. Typically, in the join criteria +For basic syntax, if fields in the left outputs and right outputs have the same name. Typically, in the join criteria ``ON t1.id = t2.id``, the names ``id`` in output are ambiguous. To avoid ambiguous, the ambiguous fields in output rename to ``.id``, or else ``.id`` if no alias existing. @@ -210,6 +194,5 @@ Assume table1 and table2 only contain field ``id``, following PPL queries and th * - source=table1 | join right=tt on table1.id=t2.id [ source=table2 as t2 | eval b = id ] | eval a = 1 - table1.id, tt.id, tt.b, a -For extended syntax (join with field list) in 3.3.0, when duplicate-named fields in output results are deduplicated, the fields in output determined by the value of 'overwrite' option. - -Since 3.3.0, join types ``inner``, ``left``, ``outer`` (alias of ``left``), ``semi`` and ``anti`` are supported by default. ``right``, ``full``, ``cross`` are performance sensitive join types which are disabled by default. Set config ``plugins.calcite.all_join_types.allowed = true`` to enable. +| For extended syntax (join with field list), when duplicate-named fields in output results are deduplicated, the fields in output determined by the value of 'overwrite' option. +| Join types ``inner``, ``left``, ``outer`` (alias of ``left``), ``semi`` and ``anti`` are supported by default. ``right``, ``full``, ``cross`` are performance sensitive join types which are disabled by default. Set config ``plugins.calcite.all_join_types.allowed = true`` to enable. diff --git a/docs/user/ppl/cmd/kmeans.rst b/docs/user/ppl/cmd/kmeans.rst index 6d558248ee4..ca4ba255c7e 100644 --- a/docs/user/ppl/cmd/kmeans.rst +++ b/docs/user/ppl/cmd/kmeans.rst @@ -13,20 +13,19 @@ Description =========== | The ``kmeans`` command applies the kmeans algorithm in the ml-commons plugin on the search result returned by a PPL command. - Syntax ====== kmeans -* centroids: optional. The number of clusters you want to group your data points into. The default value is 2. -* iterations: optional. Number of iterations. The default value is 10. -* distance_type: optional. The distance type can be COSINE, L1, or EUCLIDEAN, The default type is EUCLIDEAN. +* centroids: optional. The number of clusters you want to group your data points into. **Default:** 2. +* iterations: optional. Number of iterations. **Default:** 10. +* distance_type: optional. The distance type can be COSINE, L1, or EUCLIDEAN. **Default:** EUCLIDEAN. Example: Clustering of Iris Dataset =================================== -The example shows how to classify three Iris species (Iris setosa, Iris virginica and Iris versicolor) based on the combination of four features measured from each sample: the length and the width of the sepals and petals. +This example shows how to classify three Iris species (Iris setosa, Iris virginica and Iris versicolor) based on the combination of four features measured from each sample: the length and the width of the sepals and petals. PPL query:: @@ -42,5 +41,4 @@ PPL query:: Limitations =========== -The ``kmeans`` command can only work with ``plugins.calcite.enabled=false``. -It means ``kmeans`` command cannot work together with new PPL commands/functions introduced in 3.0.0 and above. \ No newline at end of file +The ``kmeans`` command can only work with ``plugins.calcite.enabled=false``. \ No newline at end of file diff --git a/docs/user/ppl/cmd/lookup.rst b/docs/user/ppl/cmd/lookup.rst index dfa093c117b..4d4cf84a48b 100644 --- a/docs/user/ppl/cmd/lookup.rst +++ b/docs/user/ppl/cmd/lookup.rst @@ -1,6 +1,6 @@ -============= +====== lookup -============= +====== .. rubric:: Table of contents @@ -10,54 +10,19 @@ lookup Description -============ -| (Experimental) -| (From 3.0.0) -| Lookup command enriches your search data by adding or replacing data from a lookup index (dimension table). -You can extend fields of an index with values from a dimension table, append or replace values when lookup condition is matched. -As an alternative of join command, lookup command is more suitable for enriching the source data with a static dataset. - -Version -======= -3.0.0 +=========== +| The ``lookup`` command enriches your search data by adding or replacing data from a lookup index (dimension table). You can extend fields of an index with values from a dimension table, append or replace values when lookup condition is matched. As an alternative of join command, lookup command is more suitable for enriching the source data with a static dataset. Syntax ====== -LOOKUP ( [AS ])... [(REPLACE | APPEND) ( [AS ])...] +lookup ( [as ])... [(replace | append) ( [as ])...] * lookupIndex: mandatory. The name of lookup index (dimension table). -* lookupMappingField: mandatory. A mapping key in \, analogy to a join key from right table. You can specify multiple \ with comma-delimited. -* sourceMappingField: optional. A mapping key from source (left side), analogy to a join key from left side. If you don't specify any \, its default value is \. -* inputField: optional. A field in \ where matched values are applied to result output. You can specify multiple \ with comma-delimited. If you don't specify any \, all fields expect \ from \ where matched values are applied to result output. -* outputField: optional. A field of output. You can specify zero or multiple \. If you specify \ with an existing field name in source query, its values will be replaced or appended by matched values from \. If the field specified in \ is a new field, in REPLACE strategy, an extended new field will be applied to the results, but fail in APPEND strategy. -* REPLACE | APPEND: optional. The output strategies. Default is REPLACE. If you specify REPLACE, matched values in \ field overwrite the values in result. If you specify APPEND, matched values in \ field only append to the missing values in result. - -Configuration -============= -This command requires Calcite enabled. In 3.0.0-beta, as an experimental the Calcite configuration is disabled by default. - -Enable Calcite:: - - >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ - "transient" : { - "plugins.calcite.enabled" : true - } - }' - -Result set:: - - { - "acknowledged": true, - "persistent": { - "plugins": { - "calcite": { - "enabled": "true" - } - } - }, - "transient": {} - } - +* lookupMappingField: mandatory. A mapping key in ``lookupIndex``, analogy to a join key from right table. You can specify multiple ``lookupMappingField`` with comma-delimited. +* sourceMappingField: optional. A mapping key from source (left side), analogy to a join key from left side. If not specified, defaults to ``lookupMappingField``. +* inputField: optional. A field in ``lookupIndex`` where matched values are applied to result output. You can specify multiple ``inputField`` with comma-delimited. If not specified, all fields except ``lookupMappingField`` from ``lookupIndex`` are applied to result output. +* outputField: optional. A field of output. You can specify zero or multiple ``outputField``. If ``outputField`` has an existing field name in source query, its values will be replaced or appended by matched values from ``inputField``. If the field specified in ``outputField`` is a new field, in replace strategy, an extended new field will be applied to the results, but fail in append strategy. +* replace | append: optional. The output strategies. If replace, matched values in ``lookupIndex`` field overwrite the values in result. If append, matched values in ``lookupIndex`` field only append to the missing values in result. **Default:** replace. Usage ===== @@ -73,8 +38,10 @@ Lookup:: source = table1 | lookup table2 id as cid, name append dept as department, city as location -Example 1: replace -================== +Example 1: Replace strategy +=========================== + +This example shows using the lookup command with the REPLACE strategy to overwrite existing values. PPL query:: @@ -169,8 +136,10 @@ Result set:: "size": 6 } -Example 2: append -================= +Example 2: Append strategy +========================== + +This example shows using the lookup command with the APPEND strategy to fill missing values only. PPL query:: @@ -183,8 +152,10 @@ PPL query:: }' -Example 3: no inputField -======================== +Example 3: No inputField specified +================================== + +This example shows using the lookup command without specifying inputField, which applies all fields from the lookup index. PPL query:: @@ -279,9 +250,11 @@ Result set:: "size": 6 } -Example 4: outputField as a new field +Example 4: OutputField as a new field ===================================== +This example shows using the lookup command with outputField as a new field name. + PPL query:: >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ diff --git a/docs/user/ppl/cmd/ml.rst b/docs/user/ppl/cmd/ml.rst index f38697adbbc..371df4de880 100644 --- a/docs/user/ppl/cmd/ml.rst +++ b/docs/user/ppl/cmd/ml.rst @@ -10,47 +10,53 @@ ml Description -============ -| The ``ml`` command is to train/predict/trainandpredict on any algorithm in the ml-commons plugin on the search result returned by a PPL command. - +=========== +| Use the ``ml`` command to train/predict/train and predict on any algorithm in the ml-commons plugin on the search result returned by a PPL command. -List of algorithms supported -============ -AD(RCF) -KMEANS +Syntax +====== +AD - Fixed In Time RCF For Time-series Data: +-------------------------------------------- -AD - Fixed In Time RCF For Time-series Data Command Syntax -===================================================== ml action='train' algorithm='rcf' -* number_of_trees(integer): optional. Number of trees in the forest. The default value is 30. -* shingle_size(integer): optional. A shingle is a consecutive sequence of the most recent records. The default value is 8. -* sample_size(integer): optional. The sample size used by stream samplers in this forest. The default value is 256. -* output_after(integer): optional. The number of points required by stream samplers before results are returned. The default value is 32. -* time_decay(double): optional. The decay factor used by stream samplers in this forest. The default value is 0.0001. -* anomaly_rate(double): optional. The anomaly rate. The default value is 0.005. -* time_field(string): mandatory. It specifies the time field for RCF to use as time-series data. -* date_format(string): optional. It's used for formatting time_field field. The default formatting is "yyyy-MM-dd HH:mm:ss". -* time_zone(string): optional. It's used for setting time zone for time_field filed. The default time zone is UTC. -* category_field(string): optional. It specifies the category field used to group inputs. Each category will be independently predicted. +* number_of_trees: optional integer. Number of trees in the forest. **Default:** 30. +* shingle_size: optional integer. A shingle is a consecutive sequence of the most recent records. **Default:** 8. +* sample_size: optional integer. The sample size used by stream samplers in this forest. **Default:** 256. +* output_after: optional integer. The number of points required by stream samplers before results are returned. **Default:** 32. +* time_decay: optional double. The decay factor used by stream samplers in this forest. **Default:** 0.0001. +* anomaly_rate: optional double. The anomaly rate. **Default:** 0.005. +* time_field: mandatory string. It specifies the time field for RCF to use as time-series data. +* date_format: optional string. It's used for formatting time_field field. **Default:** "yyyy-MM-dd HH:mm:ss". +* time_zone: optional string. It's used for setting time zone for time_field field. **Default:** UTC. +* category_field: optional string. It specifies the category field used to group inputs. Each category will be independently predicted. +AD - Batch RCF for Non-time-series Data: +---------------------------------------- -AD - Batch RCF for Non-time-series Data Command Syntax -================================================= ml action='train' algorithm='rcf' -* number_of_trees(integer): optional. Number of trees in the forest. The default value is 30. -* sample_size(integer): optional. Number of random samples given to each tree from the training data set. The default value is 256. -* output_after(integer): optional. The number of points required by stream samplers before results are returned. The default value is 32. -* training_data_size(integer): optional. The default value is the size of your training data set. -* anomaly_score_threshold(double): optional. The threshold of anomaly score. The default value is 1.0. -* category_field(string): optional. It specifies the category field used to group inputs. Each category will be independently predicted. +* number_of_trees: optional integer. Number of trees in the forest. **Default:** 30. +* sample_size: optional integer. Number of random samples given to each tree from the training data set. **Default:** 256. +* output_after: optional integer. The number of points required by stream samplers before results are returned. **Default:** 32. +* training_data_size: optional integer. **Default:** size of your training data set. +* anomaly_score_threshold: optional double. The threshold of anomaly score. **Default:** 1.0. +* category_field: optional string. It specifies the category field used to group inputs. Each category will be independently predicted. + +KMEANS: +------- + +ml action='train' algorithm='kmeans' + +* centroids: optional integer. The number of clusters you want to group your data points into. **Default:** 2. +* iterations: optional integer. Number of iterations. **Default:** 10. +* distance_type: optional string. The distance type can be COSINE, L1, or EUCLIDEAN. **Default:** EUCLIDEAN. Example 1: Detecting events in New York City from taxi ridership data with time-series data =========================================================================================== -The example trains an RCF model and uses the model to detect anomalies in the time-series ridership data. +This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data. PPL query:: @@ -65,7 +71,7 @@ PPL query:: Example 2: Detecting events in New York City from taxi ridership data with time-series data independently with each category ============================================================================================================================ -The example trains an RCF model and uses the model to detect anomalies in the time-series ridership data with multiple category values. +This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data with multiple category values. PPL query:: @@ -82,7 +88,7 @@ PPL query:: Example 3: Detecting events in New York City from taxi ridership data with non-time-series data =============================================================================================== -The example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data. +This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data. PPL query:: @@ -97,7 +103,7 @@ PPL query:: Example 4: Detecting events in New York City from taxi ridership data with non-time-series data independently with each category ================================================================================================================================ -The example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data with multiple category values. +This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data with multiple category values. PPL query:: @@ -110,19 +116,10 @@ PPL query:: | day | 6526.0 | 0.0 | False | +----------+---------+-------+-----------+ -KMEANS -====== -ml action='train' algorithm='kmeans' - -* centroids: optional. The number of clusters you want to group your data points into. The default value is 2. -* iterations: optional. Number of iterations. The default value is 10. -* distance_type: optional. The distance type can be COSINE, L1, or EUCLIDEAN, The default type is EUCLIDEAN. - - -Example: Clustering of Iris Dataset -=================================== +Example 5: KMEANS - Clustering of Iris Dataset +=============================================== -The example shows how to classify three Iris species (Iris setosa, Iris virginica and Iris versicolor) based on the combination of four features measured from each sample: the length and the width of the sepals and petals. +This example shows how to use KMEANS to classify three Iris species (Iris setosa, Iris virginica and Iris versicolor) based on the combination of four features measured from each sample: the length and the width of the sepals and petals. PPL query:: @@ -139,4 +136,3 @@ PPL query:: Limitations =========== The ``ml`` command can only work with ``plugins.calcite.enabled=false``. -It means ``ml`` command cannot work together with new PPL commands/functions introduced in 3.0.0 and above. diff --git a/docs/user/ppl/cmd/multisearch.rst b/docs/user/ppl/cmd/multisearch.rst index 2bac577ef23..ed1e092c8af 100644 --- a/docs/user/ppl/cmd/multisearch.rst +++ b/docs/user/ppl/cmd/multisearch.rst @@ -1,6 +1,6 @@ -============= +=========== multisearch -============= +=========== .. rubric:: Table of contents @@ -10,9 +10,8 @@ multisearch Description -============ -| (Experimental) -| Using ``multisearch`` command to run multiple search subsearches and merge their results together. The command allows you to combine data from different queries on the same or different sources, and optionally apply subsequent processing to the combined result set. +=========== +| Use the ``multisearch`` command to run multiple search subsearches and merge their results together. The command allows you to combine data from different queries on the same or different sources, and optionally apply subsequent processing to the combined result set. | Key aspects of ``multisearch``: @@ -32,30 +31,10 @@ Description Syntax ====== -| multisearch ... - -**Requirements:** - -* **Minimum 2 subsearches required** - multisearch must contain at least two subsearch blocks -* **Maximum unlimited** - you can specify as many subsearches as needed - -**Subsearch Format:** - -* Each subsearch must be enclosed in square brackets: ``[search ...]`` -* Each subsearch must start with the ``search`` keyword -* Syntax: ``[search source=index | commands...]`` -* Description: Each subsearch is a complete search pipeline enclosed in square brackets - * Supported commands in subsearches: All PPL commands are supported (``where``, ``eval``, ``fields``, ``head``, ``rename``, ``stats``, ``sort``, ``dedup``, etc.) - -* result-processing: optional. Commands applied to the merged results. - - * Description: After the multisearch operation, you can apply any PPL command to process the combined results, such as ``stats``, ``sort``, ``head``, etc. +multisearch ... -Limitations -=========== - -* **Minimum Subsearches**: At least two subsearches must be specified -* **Schema Compatibility**: When fields with the same name exist across subsearches but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type across all subsearches, or use different field names (e.g., by renaming with ``eval`` or using ``fields`` to select non-conflicting columns). +* subsearch1, subsearch2, ...: mandatory. At least two subsearches required. Each subsearch must be enclosed in square brackets and start with the ``search`` keyword. Format: ``[search source=index | commands...]``. All PPL commands are supported within subsearches. +* result-processing: optional. Commands applied to the merged results after the multisearch operation, such as ``stats``, ``sort``, ``head``, etc. Usage ===== @@ -69,7 +48,7 @@ Basic multisearch:: Example 1: Basic Age Group Analysis =================================== -Combine young and adult customers into a single result set for further analysis. +This example combines young and adult customers into a single result set for further analysis. PPL query:: @@ -87,7 +66,7 @@ PPL query:: Example 2: Success Rate Pattern =============================== -Combine high-balance and all valid accounts for comparison analysis. +This example combines high-balance and all valid accounts for comparison analysis. PPL query:: @@ -103,9 +82,9 @@ PPL query:: +-----------+---------+--------------+ Example 3: Timestamp Interleaving -================================== +================================= -Combine time-series data from multiple sources with automatic timestamp-based ordering. +This example combines time-series data from multiple sources with automatic timestamp-based ordering. PPL query:: @@ -124,7 +103,7 @@ PPL query:: Example 4: Type Compatibility - Missing Fields ================================================= -Demonstrate how missing fields are handled with NULL insertion. +This example demonstrates how missing fields are handled with NULL insertion. PPL query:: @@ -139,3 +118,9 @@ PPL query:: | Hattie | 36 | null | +-----------+-----+------------+ + +Limitations +=========== + +* **Minimum Subsearches**: At least two subsearches must be specified +* **Schema Compatibility**: When fields with the same name exist across subsearches but have incompatible types, the system automatically resolves conflicts by renaming the conflicting fields. The first occurrence retains the original name, while subsequent conflicting fields are renamed with a numeric suffix (e.g., ``age`` becomes ``age0``, ``age1``, etc.). This ensures all data is preserved while maintaining schema consistency. diff --git a/docs/user/ppl/cmd/parse.rst b/docs/user/ppl/cmd/parse.rst index 8e0dc7da080..833736238b9 100644 --- a/docs/user/ppl/cmd/parse.rst +++ b/docs/user/ppl/cmd/parse.rst @@ -1,6 +1,6 @@ -============= +===== parse -============= +===== .. rubric:: Table of contents @@ -10,26 +10,25 @@ parse Description -============ +=========== | The ``parse`` command parses a text field with a regular expression and appends the result to the search result. Syntax -============ +====== parse * field: mandatory. The field must be a text field. -* pattern: mandatory string. The regular expression pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. +* pattern: mandatory. The regular expression pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. Regular Expression ================== - The regular expression pattern is used to match the whole text field of each document with Java regex engine. Each named capture group in the expression will become a new ``STRING`` field. Example 1: Create a new field ============================= -The example shows how to create a new field ``host`` for each document. ``host`` will be the host name after ``@`` in ``email`` field. Parsing a null field will return an empty string. +This example shows how to create a new field ``host`` for each document. ``host`` will be the host name after ``@`` in ``email`` field. Parsing a null field will return an empty string. PPL query:: @@ -48,7 +47,7 @@ PPL query:: Example 2: Override an existing field ===================================== -The example shows how to override the existing ``address`` field with street number removed. +This example shows how to override the existing ``address`` field with street number removed. PPL query:: @@ -66,7 +65,7 @@ PPL query:: Example 3: Filter and sort by casted parsed field ================================================= -The example shows how to sort street numbers that are higher than 500 in ``address`` field. +This example shows how to sort street numbers that are higher than 500 in ``address`` field. PPL query:: diff --git a/docs/user/ppl/cmd/patterns.rst b/docs/user/ppl/cmd/patterns.rst index c3a785ce274..ec87aca7494 100644 --- a/docs/user/ppl/cmd/patterns.rst +++ b/docs/user/ppl/cmd/patterns.rst @@ -1,6 +1,6 @@ -============= +======== patterns -============= +======== .. rubric:: Table of contents @@ -10,39 +10,39 @@ patterns Description -============ -* The ``patterns`` command extracts log patterns from a text field and appends the results to the search result. Grouping logs by their patterns makes it easier to aggregate stats from large volumes of log data for analysis and troubleshooting. -* ``patterns`` command now allows users to select different log parsing algorithms to get high log pattern grouping accuracy. Two pattern methods are supported, aka ``simple_pattern`` and ``brain``. -* ``simple_pattern`` algorithm is basically a regex parsing method vs ``brain`` algorithm is an automatic log grouping algorithm with high grouping accuracy and keeps semantic meaning. -(From 3.1.0) +=========== +| The ``patterns`` command extracts log patterns from a text field and appends the results to the search result. Grouping logs by their patterns makes it easier to aggregate stats from large volumes of log data for analysis and troubleshooting. -* ``patterns`` command supports two modes, aka ``label`` and ``aggregation``. ``label`` mode is similar to previous 3.0.0 output. ``aggregation`` mode returns aggregated results on target field. -* V2 Engine engine still have the same output in ``label`` mode as before. In ``aggregation`` mode, it returns aggregated pattern count on labeled pattern as well as sample logs (sample count is configurable) per pattern. -* Calcite engine by default labels the variables with '<*>' placeholder. -* If ``show_numbered_token`` option is turned on, Calcite engine's ``label`` mode not only labels pattern of text but also labels variable tokens in map. In ``aggregation`` mode, it will also output labeled pattern as well as variable tokens per pattern. The variable placeholder is in the format of '' instead of '<*>'. +| ``patterns`` command allows users to select different log parsing algorithms to get high log pattern grouping accuracy. Two pattern methods are supported: ``simple_pattern`` and ``brain``. + +| ``simple_pattern`` algorithm is basically a regex parsing method vs ``brain`` algorithm is an automatic log grouping algorithm with high grouping accuracy and keeps semantic meaning. + +| ``patterns`` command supports two modes: ``label`` and ``aggregation``. ``label`` mode returns individual pattern labels. ``aggregation`` mode returns aggregated results on target field. + +| Calcite engine by default labels the variables with '<*>' placeholder. If ``show_numbered_token`` option is turned on, Calcite engine's ``label`` mode not only labels pattern of text but also labels variable tokens in map. In ``aggregation`` mode, it will also output labeled pattern as well as variable tokens per pattern. The variable placeholder is in the format of '' instead of '<*>'. Syntax -============ +====== patterns [by byClause...] [method=simple_pattern | brain] [mode=label | aggregation] [max_sample_count=integer] [buffer_limit=integer] [show_numbered_token=boolean] [new_field=] (algorithm parameters...) -* field: mandatory. The text(string) field to analyze for patterns. +* field: mandatory. The text field to analyze for patterns. * byClause: optional. Fields or scalar functions used to group logs for labeling/aggregation. -* method: optional. Algorithm choice: ``simple_pattern`` (default) or ``brain``. The method is configured by the setting ``plugins.ppl.pattern.method``. -* mode: optional. Output mode: ``label`` (default) or ``aggregation``. The mode is configured by the setting ``plugins.ppl.pattern.mode``. -* max_sample_count: optional. Max sample logs returned per pattern in aggregation mode (default: 10). The max_sample_count is configured by the setting ``plugins.ppl.pattern.max.sample.count``. -* buffer_limit: optional. Safeguard parameter for ``brain`` algorithm to limit internal temporary buffer size (default: 100,000, min: 50,000). The buffer_limit is configured by the setting ``plugins.ppl.pattern.buffer.limit``. -* show_numbered_token: optional. The flag to turn on numbered token output format (default: false). The show_numbered_token is configured by the setting ``plugins.ppl.pattern.show.numbered.token``. -* new_field: Alias of the output pattern field. (default: "patterns_field"). +* method: optional. Algorithm choice: ``simple_pattern`` or ``brain``. **Default:** ``simple_pattern``. +* mode: optional. Output mode: ``label`` or ``aggregation``. **Default:** ``label``. +* max_sample_count: optional. Max sample logs returned per pattern in aggregation mode. **Default:** 10. +* buffer_limit: optional. Safeguard parameter for ``brain`` algorithm to limit internal temporary buffer size (min: 50,000). **Default:** 100,000. +* show_numbered_token: optional. The flag to turn on numbered token output format. **Default:** false. +* new_field: optional. Alias of the output pattern field. **Default:** "patterns_field". * algorithm parameters: optional. Algorithm-specific tuning: - - ``simple_pattern`` : Define regex via "pattern". - - ``brain`` : Adjust sensitivity with variable_count_threshold (int > 0) and frequency_threshold_percentage (double 0.0 - 1.0). + * ``simple_pattern``: Define regex via "pattern". + * ``brain``: Adjust sensitivity with variable_count_threshold and frequency_threshold_percentage. - - ``variable_count_threshold``: Optional integer(Default value is 5). Words(or we say tokens) are split by space. Algorithm will count how many distinct words are at specific position in initial log groups. Same log group's constant word ideally should be distinct at its position but it's not guaranteed because some words could be enums. Adjusting this threshold can primarily determine the sensitivity of constant words. - - ``frequency_threshold_percentage``: Optional double(Default value is 0.3). Brain's log pattern is selected based on longest word combination. A word combination is words with same frequency per message. To select longest word combination frequency, it needs a lower bound of frequency to ignore too low frequency words. The representative frequency of longest word combination should be >= highest token frequency of log * threshold percentage. Adjusting this threshold could prune some low frequency words. + * ``variable_count_threshold``: optional integer. Words are split by space. Algorithm counts how many distinct words are at specific position in initial log groups. Adjusting this threshold can determine the sensitivity of constant words. **Default:** 5. + * ``frequency_threshold_percentage``: optional double. Brain's log pattern is selected based on longest word combination. This sets the lower bound of frequency to ignore low frequency words. **Default:** 0.3. -Change default pattern method -============ +Change the default pattern method +================================= To override default pattern parameters, users can run following command .. code-block:: @@ -59,9 +59,9 @@ To override default pattern parameters, users can run following command } Simple Pattern Example 1: Create the new field -=============================== +============================================== -The example shows how to extract patterns in ``email`` for each document. Parsing a null field will return an empty string. +This example shows how to extract patterns in ``email`` for each document. Parsing a null field will return an empty string. PPL query:: @@ -77,9 +77,9 @@ PPL query:: +-----------------------+----------------+ Simple Pattern Example 2: Extract log patterns -=============================== +============================================== -The example shows how to extract patterns from a raw log field using the default patterns. +This example shows how to extract patterns from a raw log field using the default patterns. PPL query:: @@ -95,9 +95,9 @@ PPL query:: +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------+ Simple Pattern Example 3: Extract log patterns with custom regex pattern -========================================================= +======================================================================== -The example shows how to extract patterns from a raw log field using user defined patterns. +This example shows how to extract patterns from a raw log field using user defined patterns. PPL query:: @@ -113,9 +113,9 @@ PPL query:: +-----------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Simple Pattern Example 4: Return log patterns aggregation result -========================================================= +================================================================ -Starting 3.1.0, patterns command support aggregation mode. The example shows how to get aggregated results from a raw log field. +This example shows how to get aggregated results from a raw log field. PPL query:: @@ -131,13 +131,13 @@ PPL query:: +---------------------------------------------------------------------------------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------+ Simple Pattern Example 5: Return log patterns aggregation result with detected variable tokens -========================================================= +============================================================================================== -Starting 3.1.0, patterns command support aggregation mode. +This example shows how to get aggregated results with detected variable tokens. Configuration ------------- -With Calcite specific option ``show_numbered_token`` enabled, the output can detect numbered variable tokens from the pattern field. +With option ``show_numbered_token`` enabled, the output can detect numbered variable tokens from the pattern field. PPL query:: @@ -150,9 +150,9 @@ PPL query:: +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Brain Example 1: Extract log patterns -=============================== +===================================== -The example shows how to extract semantic meaningful log patterns from a raw log field using the brain algorithm. The default variable count threshold is 5. +This example shows how to extract semantic meaningful log patterns from a raw log field using the brain algorithm. The default variable count threshold is 5. PPL query:: @@ -168,9 +168,9 @@ PPL query:: +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+ Brain Example 2: Extract log patterns with custom parameters -=============================== +============================================================ -The example shows how to extract semantic meaningful log patterns from a raw log field using defined parameter of brain algorithm. +This example shows how to extract semantic meaningful log patterns from a raw log field using custom parameters of the brain algorithm. PPL query:: @@ -186,9 +186,9 @@ PPL query:: +-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------+ Brain Example 3: Return log patterns aggregation result -=============================== +======================================================= -Starting 3.1.0, patterns command support aggregation mode. +This example shows how to get aggregated results from a raw log field using the brain algorithm. PPL query:: @@ -201,13 +201,13 @@ PPL query:: +----------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Brain Example 4: Return log patterns aggregation result with detected variable tokens -========================================================= +===================================================================================== -Starting 3.1.0, patterns command support aggregation mode. +This example shows how to get aggregated results with detected variable tokens using the brain algorithm. Configuration ------------- -With Calcite specific option ``show_numbered_token`` enabled, the output can detect numbered variable tokens from the pattern field. +With option ``show_numbered_token`` enabled, the output can detect numbered variable tokens from the pattern field. PPL query:: @@ -220,6 +220,6 @@ PPL query:: +----------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Limitations -========== +=========== - Patterns command is not pushed down to OpenSearch data node for now. It will only group log patterns on log messages returned to coordinator node. diff --git a/docs/user/ppl/cmd/rare.rst b/docs/user/ppl/cmd/rare.rst index d16dc4878dd..e72c8c8c2c9 100644 --- a/docs/user/ppl/cmd/rare.rst +++ b/docs/user/ppl/cmd/rare.rst @@ -11,30 +11,28 @@ rare Description =========== -| Using ``rare`` command to find the least common tuple of values of all fields in the field list. +| The ``rare`` command finds the least common tuple of values of all fields in the field list. -**Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields. +| **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields. Syntax ====== -rare [by-clause] +rare [rare-options] [by-clause] -rare [rare-options] [by-clause] ``(available from 3.1.0+)`` +* field-list: mandatory. Comma-delimited list of field names. +* by-clause: optional. One or more fields to group the results by. +* rare-options: optional. Options for the rare command. Supported syntax is [countfield=] [showcount=]. +* showcount=: optional. Whether to create a field in output that represent a count of the tuple of values. **Default:** ``true``. +* countfield=: optional. The name of the field that contains count. **Default:** ``'count'``. +* usenull=: optional. whether to output the null value. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``: -* field-list: mandatory. comma-delimited list of field names. -* by-clause: optional. one or more fields to group the results by. -* rare-options: optional. options for the rare command. Supported syntax is [countfield=] [showcount=]. -* showcount=: optional. whether to create a field in output that represent a count of the tuple of values. Default value is ``true``. -* countfield=: optional. the name of the field that contains count. Default value is ``'count'``. -* usenull=: optional (since 3.4.0). whether to output the null value. The default value of ``usenull`` is determined by ``plugins.ppl.syntax.legacy.preferred``: - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` Example 1: Find the least common values in a field ================================================== -The example finds least common gender of all the accounts. +This example shows how to find the least common gender of all the accounts. PPL query:: @@ -51,7 +49,7 @@ PPL query:: Example 2: Find the least common values organized by gender =========================================================== -The example finds least common age of all the accounts group by gender. +This example shows how to find the least common age of all the accounts grouped by gender. PPL query:: @@ -66,8 +64,10 @@ PPL query:: | M | 36 | +--------+-----+ -Example 3: Rare command with Calcite enabled -============================================ +Example 3: Rare command +======================= + +This example shows how to find the least common gender of all the accounts. PPL query:: @@ -84,6 +84,8 @@ PPL query:: Example 4: Specify the count field option ========================================= +This example shows how to specify the count field. + PPL query:: os> source=accounts | rare countfield='cnt' gender; diff --git a/docs/user/ppl/cmd/regex.rst b/docs/user/ppl/cmd/regex.rst index 307aa0129d1..154949ba133 100644 --- a/docs/user/ppl/cmd/regex.rst +++ b/docs/user/ppl/cmd/regex.rst @@ -1,6 +1,6 @@ -============= +===== regex -============= +===== .. rubric:: Table of contents @@ -10,15 +10,11 @@ regex Description -============ +=========== | The ``regex`` command filters search results by matching field values against a regular expression pattern. Only documents where the specified field matches the pattern are included in the results. -Version -======= -3.3.0 - Syntax -============ +====== regex = regex != @@ -28,7 +24,7 @@ regex != * != : operator for negative matching (exclude matches) Regular Expression Engine -========================== +========================= The regex command uses Java's built-in regular expression engine, which supports: @@ -42,7 +38,7 @@ For complete documentation of Java regex patterns and available modes, see the ` Example 1: Basic pattern matching ================================= -The example shows how to filter documents where the ``lastname`` field matches names starting with uppercase letters. +This example shows how to filter documents where the ``lastname`` field matches names starting with uppercase letters. PPL query:: @@ -61,7 +57,7 @@ PPL query:: Example 2: Negative matching ============================ -The example shows how to exclude documents where the ``lastname`` field ends with "son". +This example shows how to exclude documents where the ``lastname`` field ends with "son". PPL query:: @@ -80,7 +76,7 @@ PPL query:: Example 3: Email domain matching ================================ -The example shows how to filter documents by email domain patterns. +This example shows how to filter documents by email domain patterns. PPL query:: @@ -96,7 +92,7 @@ PPL query:: Example 4: Complex patterns with character classes ================================================== -The example shows how to use complex regex patterns with character classes and quantifiers. +This example shows how to use complex regex patterns with character classes and quantifiers. PPL query:: @@ -115,7 +111,7 @@ PPL query:: Example 5: Case-sensitive matching ================================== -The example demonstrates that regex matching is case-sensitive by default. +This example demonstrates that regex matching is case-sensitive by default. PPL query:: @@ -140,5 +136,5 @@ PPL query:: Limitations =========== -* **Field specification required**: A field name must be specified in the regex command. Pattern-only syntax (e.g., ``regex "pattern"``) is not currently supported -* **String fields only**: The regex command currently only supports string fields. Using it on numeric or boolean fields will result in an error +| * **Field specification required**: A field name must be specified in the regex command. Pattern-only syntax (e.g., ``regex "pattern"``) is not currently supported +| * **String fields only**: The regex command currently only supports string fields. Using it on numeric or boolean fields will result in an error diff --git a/docs/user/ppl/cmd/rename.rst b/docs/user/ppl/cmd/rename.rst index ed7f806aad1..eb92a45b8cb 100644 --- a/docs/user/ppl/cmd/rename.rst +++ b/docs/user/ppl/cmd/rename.rst @@ -1,6 +1,6 @@ -============= +====== rename -============= +====== .. rubric:: Table of contents @@ -10,19 +10,18 @@ rename Description -============ -| Using ``rename`` command to rename one or more fields in the search result. - +=========== +| The ``rename`` command renames one or more fields in the search result. Syntax -============ +====== rename AS ["," AS ]... -* source-field: mandatory. The name of the field you want to rename. Supports wildcard patterns since version 3.3 using ``*``. +* source-field: mandatory. The name of the field you want to rename. Supports wildcard patterns using ``*``. * target-field: mandatory. The name you want to rename to. Must have same number of wildcards as the source. -Field Rename Behavior (Since version 3.3) -========================================== +Behavior +======== The rename command handles non-existent fields as follows: @@ -30,17 +29,10 @@ The rename command handles non-existent fields as follows: * **Renaming a non-existent field to an existing field**: The existing target field is removed from the result set. * **Renaming an existing field to an existing field**: The existing target field is removed and the source field is renamed to the target. - -**Notes:** - -* Literal asterisk (*) characters in field names cannot be replaced as asterisk is used for wildcard matching. -* Wildcards are only supported when the Calcite query engine is enabled. - - Example 1: Rename one field =========================== -The example show rename one field. +This example shows how to rename one field. PPL query:: @@ -59,7 +51,7 @@ PPL query:: Example 2: Rename multiple fields ================================= -The example show rename multiple fields. +This example shows how to rename multiple fields. PPL query:: @@ -76,9 +68,9 @@ PPL query:: Example 3: Rename with wildcards -================================= +================================ -The example shows renaming multiple fields using wildcard patterns. (Requires Calcite query engine) +This example shows how to rename multiple fields using wildcard patterns. PPL query:: @@ -95,9 +87,9 @@ PPL query:: Example 4: Rename with multiple wildcard patterns -================================================== +================================================= -The example shows renaming multiple fields using multiple wildcard patterns. (Requires Calcite query engine) +This example shows how to rename multiple fields using multiple wildcard patterns. PPL query:: @@ -113,9 +105,9 @@ PPL query:: +------------+-----------+---------------+ Example 5: Rename existing field to existing field -==================================== +================================================== -The example shows renaming an existing field to an existing field. The target field gets removed and the source field is renamed to the target field. +This example shows how to rename an existing field to an existing field. The target field gets removed and the source field is renamed to the target field. PPL query:: @@ -134,4 +126,5 @@ PPL query:: Limitations =========== -The ``rename`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. +| The ``rename`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. +| Literal asterisk (*) characters in field names cannot be replaced as asterisk is used for wildcard matching. diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst index 0098124344d..60a28bc8ce0 100644 --- a/docs/user/ppl/cmd/replace.rst +++ b/docs/user/ppl/cmd/replace.rst @@ -1,6 +1,6 @@ -============= +======= replace -============= +======= .. rubric:: Table of contents @@ -10,24 +10,22 @@ replace Description -============ -Using ``replace`` command to replace text in one or more fields. Supports literal string replacement and wildcard patterns using ``*``. - -Note: This command is only available when Calcite engine is enabled. +=========== +The ``replace`` replaces text in one or more fields in the search result. Supports literal string replacement and wildcard patterns using ``*``. Syntax -============ +====== replace '' WITH '' [, '' WITH '']... IN [, ]... - -Examples -======== +* pattern: mandatory. The text pattern you want to replace. +* replacement: mandatory. The text you want to replace with. +* field-name: mandatory. One or more field names where the replacement should occur. Example 1: Replace text in one field ------------------------------------- +==================================== -The example shows replacing text in one field. +This example shows replacing text in one field. PPL query:: @@ -44,9 +42,9 @@ PPL query:: Example 2: Replace text in multiple fields ------------------------------------- +========================================== -The example shows replacing text in multiple fields. +This example shows replacing text in multiple fields. PPL query:: @@ -63,9 +61,9 @@ PPL query:: Example 3: Replace with other commands in a pipeline ------------------------------------- +==================================================== -The example shows using replace with other commands in a query pipeline. +This example shows using replace with other commands in a query pipeline. PPL query:: @@ -80,9 +78,9 @@ PPL query:: +----------+-----+ Example 4: Replace with multiple pattern/replacement pairs ------------------------------------- +========================================================== -The example shows using multiple pattern/replacement pairs in a single replace command. The replacements are applied sequentially. +This example shows using multiple pattern/replacement pairs in a single replace command. The replacements are applied sequentially. PPL query:: @@ -98,7 +96,7 @@ PPL query:: +-----------+ Example 5: Pattern matching with LIKE and replace ------------------------------------- +================================================= Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. diff --git a/docs/user/ppl/cmd/reverse.rst b/docs/user/ppl/cmd/reverse.rst index 2efe833855f..d839a687bf9 100644 --- a/docs/user/ppl/cmd/reverse.rst +++ b/docs/user/ppl/cmd/reverse.rst @@ -1,6 +1,6 @@ -============= +======= reverse -============= +======= .. rubric:: Table of contents @@ -10,28 +10,23 @@ reverse Description -============ -| Using ``reverse`` command to reverse the display order of search results. The same results are returned, but in reverse order. - -Version -======= -3.2.0 +=========== +| The ``reverse`` command reverses the display order of search results. The same results are returned, but in reverse order. Syntax -============ +====== reverse - * No parameters: The reverse command takes no arguments or options. Note -===== -The `reverse` command processes the entire dataset. If applied directly to millions of records, it will consume significant memory resources on the coordinating node. Users should only apply the `reverse` command to smaller datasets, typically after aggregation operations. +==== +| The `reverse` command processes the entire dataset. If applied directly to millions of records, it will consume significant memory resources on the coordinating node. Users should only apply the `reverse` command to smaller datasets, typically after aggregation operations. Example 1: Basic reverse operation ================================== -The example shows reversing the order of all documents. +This example shows reversing the order of all documents. PPL query:: @@ -50,7 +45,7 @@ PPL query:: Example 2: Reverse with sort ============================ -The example shows reversing results after sorting by age in ascending order, effectively giving descending order. +This example shows reversing results after sorting by age in ascending order, effectively giving descending order. PPL query:: @@ -69,7 +64,7 @@ PPL query:: Example 3: Reverse with head ============================ -The example shows using reverse with head to get the last 2 records from the original order. +This example shows using reverse with head to get the last 2 records from the original order. PPL query:: @@ -86,7 +81,7 @@ PPL query:: Example 4: Double reverse ========================= -The example shows that applying reverse twice returns to the original order. +This example shows that applying reverse twice returns to the original order. PPL query:: @@ -103,9 +98,9 @@ PPL query:: Example 5: Reverse with complex pipeline -======================================= +======================================== -The example shows reverse working with filtering and field selection. +This example shows reverse working with filtering and field selection. PPL query:: diff --git a/docs/user/ppl/cmd/rex.rst b/docs/user/ppl/cmd/rex.rst index 28839247194..3dec7f26c42 100644 --- a/docs/user/ppl/cmd/rex.rst +++ b/docs/user/ppl/cmd/rex.rst @@ -1,6 +1,6 @@ -============= +=== rex -============= +=== .. rubric:: Table of contents @@ -10,37 +10,33 @@ rex Description -============ +=========== | The ``rex`` command extracts fields from a raw text field using regular expression named capture groups. -Version -======= -3.3.0 - Syntax -============ +====== rex [mode=] field= [max_match=] [offset_field=] * field: mandatory. The field must be a string field to extract data from. * pattern: mandatory string. The regular expression pattern with named capture groups used to extract new fields. Pattern must contain at least one named capture group using ``(?pattern)`` syntax. -* mode: optional. Either ``extract`` (default) or ``sed``. +* mode: optional. Either ``extract`` or ``sed``. **Default:** extract - - **extract mode** (default): Creates new fields from regular expression named capture groups. This is the standard field extraction behavior. - - **sed mode**: Performs text substitution on the field using sed-style patterns: + * **extract mode** (default): Creates new fields from regular expression named capture groups. This is the standard field extraction behavior. + * **sed mode**: Performs text substitution on the field using sed-style patterns - - ``s/pattern/replacement/`` - Replace first occurrence - - ``s/pattern/replacement/g`` - Replace all occurrences (global) - - ``s/pattern/replacement/n`` - Replace only the nth occurrence (where n is a number) - - ``y/from_chars/to_chars/`` - Character-by-character transliteration - - Backreferences: ``\1``, ``\2``, etc. reference captured groups in replacement + * ``s/pattern/replacement/`` - Replace first occurrence + * ``s/pattern/replacement/g`` - Replace all occurrences (global) + * ``s/pattern/replacement/n`` - Replace only the nth occurrence (where n is a number) + * ``y/from_chars/to_chars/`` - Character-by-character transliteration + * Backreferences: ``\1``, ``\2``, etc. reference captured groups in replacement * max_match: optional integer (default=1). Maximum number of matches to extract. If greater than 1, extracted fields become arrays. The value 0 means unlimited matches, but is automatically capped to the configured limit (default: 10, configurable via ``plugins.ppl.rex.max_match.limit``). * offset_field: optional string. Field name to store the character offset positions of matches. Only available in extract mode. Example 1: Basic Field Extraction -================================== +================================= -Extract username and domain from email addresses using named capture groups. Both extracted fields are returned as string type. +This example shows extracting username and domain from email addresses using named capture groups. Both extracted fields are returned as string type. PPL query:: @@ -55,9 +51,9 @@ PPL query:: Example 2: Handling Non-matching Patterns -========================================== +========================================= -The rex command returns all events, setting extracted fields to null for non-matching patterns. Extracted fields would be string type when matches are found. +This example shows the rex command returning all events, setting extracted fields to null for non-matching patterns. Extracted fields would be string type when matches are found. PPL query:: @@ -72,9 +68,9 @@ PPL query:: Example 3: Multiple Matches with max_match -=========================================== +========================================== -Extract multiple words from address field using max_match parameter. The extracted field is returned as an array type containing string elements. +This example shows extracting multiple words from address field using max_match parameter. The extracted field is returned as an array type containing string elements. PPL query:: @@ -90,9 +86,9 @@ PPL query:: Example 4: Text Replacement with mode=sed -========================================== +========================================= -Replace email domains using sed mode for text substitution. The extracted field is returned as string type. +This example shows replacing email domains using sed mode for text substitution. The extracted field is returned as string type. PPL query:: @@ -107,9 +103,9 @@ PPL query:: Example 5: Using offset_field -============================== +============================= -Track the character positions where matches occur. Extracted fields are string type, and the offset_field is also string type. +This example shows tracking the character positions where matches occur. Extracted fields are string type, and the offset_field is also string type. PPL query:: @@ -124,9 +120,9 @@ PPL query:: Example 6: Complex Email Pattern -================================= +================================ -Extract comprehensive email components including top-level domain. All extracted fields are returned as string type. +This example shows extracting comprehensive email components including top-level domain. All extracted fields are returned as string type. PPL query:: @@ -141,9 +137,9 @@ PPL query:: Example 7: Chaining Multiple rex Commands -========================================== +========================================= -Extract initial letters from both first and last names. All extracted fields are returned as string type. +This example shows extracting initial letters from both first and last names. All extracted fields are returned as string type. PPL query:: @@ -159,9 +155,9 @@ PPL query:: Example 8: Named Capture Group Limitations -============================================ +========================================== -Demonstrates naming restrictions for capture groups. Group names cannot contain underscores due to Java regex limitations. +This example demonstrates naming restrictions for capture groups. Group names cannot contain underscores due to Java regex limitations. Invalid PPL query with underscores:: @@ -182,9 +178,9 @@ Correct PPL query without underscores:: Example 9: Max Match Limit Protection -====================================== +===================================== -Demonstrates the max_match limit protection mechanism. When max_match=0 (unlimited) is specified, the system automatically caps it to prevent memory exhaustion. +This example demonstrates the max_match limit protection mechanism. When max_match=0 (unlimited) is specified, the system automatically caps it to prevent memory exhaustion. PPL query with max_match=0 automatically capped to default limit of 10:: @@ -221,22 +217,19 @@ Special Characters in Group Names No No Limitations =========== - -There are several important limitations with the rex command: - **Named Capture Group Naming:** -- Group names must start with a letter and contain only letters and digits -- For detailed Java regex pattern syntax and usage, refer to the `official Java Pattern documentation `_ +* Group names must start with a letter and contain only letters and digits +* For detailed Java regex pattern syntax and usage, refer to the `official Java Pattern documentation `_ **Pattern Requirements:** -- Pattern must contain at least one named capture group -- Regular capture groups ``(...)`` without names are not allowed +* Pattern must contain at least one named capture group +* Regular capture groups ``(...)`` without names are not allowed **Max Match Limit:** - -- The ``max_match`` parameter is subject to a configurable system limit to prevent memory exhaustion -- When ``max_match=0`` (unlimited) is specified, it is automatically capped at the configured limit (default: 10) -- User-specified values exceeding the configured limit will result in an error -- Users can adjust the limit via the ``plugins.ppl.rex.max_match.limit`` cluster setting. Setting this limit to a large value is not recommended as it can lead to excessive memory consumption, especially with patterns that match empty strings (e.g., ``\d*``, ``\w*``) \ No newline at end of file + +* The ``max_match`` parameter is subject to a configurable system limit to prevent memory exhaustion +* When ``max_match=0`` (unlimited) is specified, it is automatically capped at the configured limit (default: 10) +* User-specified values exceeding the configured limit will result in an error +* Users can adjust the limit via the ``plugins.ppl.rex.max_match.limit`` cluster setting. Setting this limit to a large value is not recommended as it can lead to excessive memory consumption, especially with patterns that match empty strings (e.g., ``\d*``, ``\w*``) \ No newline at end of file diff --git a/docs/user/ppl/cmd/search.rst b/docs/user/ppl/cmd/search.rst index 11b6bf99df4..31aa28cc46d 100644 --- a/docs/user/ppl/cmd/search.rst +++ b/docs/user/ppl/cmd/search.rst @@ -1,6 +1,6 @@ -============= +====== search -============= +====== .. rubric:: Table of contents @@ -10,12 +10,12 @@ search Description -============ -| Using ``search`` command to retrieve document from the index. ``search`` command could be only used as the first command in the PPL query. +=========== +| The ``search`` command retrieves document from the index. The ``search`` command can only be used as the first command in the PPL query. Syntax -============ +====== search source=[:] [search-expression] * search: search keyword, which could be ignored. @@ -88,7 +88,7 @@ You can check or modify the default field setting:: } Field Types and Search Behavior -================================ +=============================== **Text Fields**: Full-text search, phrase search @@ -135,11 +135,8 @@ Cross-Cluster Search ==================== Cross-cluster search lets any node in a cluster execute search requests against other clusters. Refer to `Cross-Cluster Search `_ for configuration. -Examples -======== - Example 1: Text Search ------------------------------------ +====================== **Basic Text Search** (unquoted single term):: @@ -194,7 +191,7 @@ Note: ``search user email`` is equivalent to ``search user AND email``. Multiple +----------------------------------------------------------------------------------------------------------+ Example 2: Boolean Logic and Operator Precedence -------------------------------------------------- +================================================= **Boolean Operators**:: @@ -230,7 +227,7 @@ Example 2: Boolean Logic and Operator Precedence The above evaluates as ``(severityText="ERROR" OR severityText="WARN") AND severityNumber>15`` Example 3: NOT vs != Semantics -------------------------------- +============================== **!= operator** (field must exist and not equal the value):: @@ -260,7 +257,7 @@ Example 3: NOT vs != Semantics Dale Adams (account 18) has ``employer=null``. He appears in ``NOT employer="Quility"`` but not in ``employer!="Quility"``. Example 4: Wildcards --------------------- +==================== **Wildcard Patterns**:: @@ -302,7 +299,7 @@ Example 4: Wildcards Example 5: Range Queries -------------------------- +======================== Use comparison operators (>, <, >=, <=) to filter numeric and date fields within specific ranges. Range queries are particularly useful for filtering by age, price, timestamps, or any numeric metrics. @@ -327,7 +324,7 @@ Use comparison operators (>, <, >=, <=) to filter numeric and date fields within +---------------------------------------------------------+ Example 6: Field Search with Wildcards ---------------------------------------- +====================================== When searching in text or keyword fields, wildcards enable partial matching. This is particularly useful for finding records where you only know part of the value. Note that wildcards work best with keyword fields, while text fields may produce unexpected results due to tokenization. @@ -359,7 +356,7 @@ When searching in text or keyword fields, wildcards enable partial matching. Thi * **Case sensitivity**: Keyword field wildcards are case-sensitive unless normalized during indexing Example 7: IN Operator and Field Comparisons ---------------------------------------------- +============================================ The IN operator efficiently checks if a field matches any value from a list. This is cleaner and more performant than chaining multiple OR conditions for the same field. @@ -394,7 +391,7 @@ The IN operator efficiently checks if a field matches any value from a list. Thi +---------------------------------------------------------+ Example 8: Complex Expressions -------------------------------- +============================== Combine multiple conditions using boolean operators and parentheses to create sophisticated search queries. @@ -419,7 +416,7 @@ Combine multiple conditions using boolean operators and parentheses to create so +---------------------------------------------------------+ Example 9: Time Modifiers --------------------------- +========================= Time modifiers filter search results by time range using the implicit ``@timestamp`` field. They support various time formats for precise temporal filtering. @@ -476,7 +473,7 @@ Time modifiers filter search results by time range using the implicit ``@timesta +-------------------------------+--------------+ Example 10: Special Characters and Escaping -------------------------------------------- +=========================================== Understand when and how to escape special characters in your search queries. There are two categories of characters that need escaping: @@ -541,7 +538,7 @@ Note: Each backslash in the search value needs to be escaped with another backsl +--------------------------------------------------------------------------------------------------------------------------------------------------------+ Example 11: Fetch All Data ----------------------------- +========================== Retrieve all documents from an index by specifying only the source without any search conditions. This is useful for exploring small datasets or verifying data ingestion. diff --git a/docs/user/ppl/cmd/showdatasources.rst b/docs/user/ppl/cmd/showdatasources.rst index f12622f54da..9d0794bb3aa 100644 --- a/docs/user/ppl/cmd/showdatasources.rst +++ b/docs/user/ppl/cmd/showdatasources.rst @@ -10,19 +10,17 @@ show datasources Description -============ -| Using ``show datasources`` command to query datasources configured in the PPL engine. ``show datasources`` command could be only used as the first command in the PPL query. - +=========== +| Use the ``show datasources`` command to query datasources configured in the PPL engine. The ``show datasources`` command can only be used as the first command in the PPL query. Syntax -============ +====== show datasources - Example 1: Fetch all PROMETHEUS datasources =========================================== -The example fetches all the datasources of type prometheus. +This example shows fetching all the datasources of type prometheus. PPL query for all PROMETHEUS DATASOURCES:: @@ -38,4 +36,3 @@ PPL query for all PROMETHEUS DATASOURCES:: Limitations =========== The ``show datasources`` command can only work with ``plugins.calcite.enabled=false``. -It means ``show datasources`` command cannot work together with new PPL commands/functions introduced in 3.0.0 and above. diff --git a/docs/user/ppl/cmd/sort.rst b/docs/user/ppl/cmd/sort.rst index e02a8fdae8d..929a2b313b4 100644 --- a/docs/user/ppl/cmd/sort.rst +++ b/docs/user/ppl/cmd/sort.rst @@ -1,6 +1,6 @@ -============= +==== sort -============= +==== .. rubric:: Table of contents @@ -10,16 +10,15 @@ sort Description -============ -| Using ``sort`` command to sorts all the search result by the specified fields. - +=========== +| The ``sort`` command sorts all the search results by the specified fields. Syntax ============ sort [count] <[+|-] sort-field | sort-field [asc|a|desc|d]>... -* count (Since 3.3): optional. The number of results to return. **Default:** returns all results. Specifying a count of 0 or less than 0 also returns all results. +* count: optional. The number of results to return. Specifying a count of 0 or less than 0 returns all results. **Default:** 0. * [+|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. * [asc|a|desc|d]: optional. asc/a stands for ascending order and NULL/MISSING first. desc/d stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. * sort-field: mandatory. The field used to sort. Can use ``auto(field)``, ``str(field)``, ``ip(field)``, or ``num(field)`` to specify how to interpret field values. @@ -29,9 +28,9 @@ sort [count] <[+|-] sort-field | sort-field [asc|a|desc|d]>... Example 1: Sort by one field -============================= +============================ -The example show sort all the document with age field in ascending order. +This example shows sorting all documents by age field in ascending order. PPL query:: @@ -50,7 +49,7 @@ PPL query:: Example 2: Sort by one field return all the result ================================================== -The example show sort all the document with age field in ascending order. +This example shows sorting all documents by age field in ascending order and returning all results. PPL query:: @@ -69,7 +68,7 @@ PPL query:: Example 3: Sort by one field in descending order (using -) ========================================================== -The example show sort all the document with age field in descending order using the - operator. +This example shows sorting all documents by age field in descending order. PPL query:: @@ -87,7 +86,7 @@ PPL query:: Example 4: Sort by one field in descending order (using desc) ============================================================== -The example show sort all the document with age field in descending order using the desc keyword. +This example shows sorting all the document by the age field in descending order using the desc keyword. PPL query:: @@ -105,7 +104,7 @@ PPL query:: Example 5: Sort by multiple fields (using +/-) ============================================== -The example show sort all the document with gender field in ascending order and age field in descending using +/- operators. +This example shows sorting all documents by gender field in ascending order and age field in descending order using +/- operators. PPL query:: @@ -123,7 +122,7 @@ PPL query:: Example 6: Sort by multiple fields (using asc/desc) ==================================================== -The example show sort all the document with gender field in ascending order and age field in descending using asc/desc keywords. +This example shows sorting all the document by the gender field in ascending order and age field in descending order using asc/desc keywords. PPL query:: @@ -141,7 +140,7 @@ PPL query:: Example 7: Sort by field include null value =========================================== -The example shows sorting the employer field by the default option (ascending order and null first), the result shows that the null value is in the first row. +This example shows sorting employer field by default option (ascending order and null first). The result shows that null value is in the first row. PPL query:: @@ -159,7 +158,7 @@ PPL query:: Example 8: Specify the number of sorted documents to return ============================================================ -The example shows sorting all the document and returning 2 documents. +This example shows sorting all documents and returning 2 documents. PPL query:: @@ -175,7 +174,7 @@ PPL query:: Example 9: Sort with desc modifier =================================== -The example shows sorting with the desc modifier to reverse sort order. +This example shows sorting with the desc modifier to reverse sort order. PPL query:: @@ -193,7 +192,7 @@ PPL query:: Example 10: Sort with specifying field type ================================== -The example shows sorting with str() to sort numeric values lexicographically. +This example shows sorting with str() to sort numeric values lexicographically. PPL query:: diff --git a/docs/user/ppl/cmd/spath.rst b/docs/user/ppl/cmd/spath.rst index 85ba328c27b..f7a9d034132 100644 --- a/docs/user/ppl/cmd/spath.rst +++ b/docs/user/ppl/cmd/spath.rst @@ -1,6 +1,6 @@ -============= +===== spath -============= +===== .. rubric:: Table of contents @@ -10,20 +10,15 @@ spath Description -============ +=========== | The `spath` command allows extracting fields from structured text data. It currently allows selecting from JSON data with JSON paths. -Version -======= -3.3.0 - Syntax -============ +====== spath input= [output=] [path=] - * input: mandatory. The field to scan for JSON data. -* output: optional. The destination field that the data will be loaded to. Defaults to the value of `path`. +* output: optional. The destination field that the data will be loaded to. **Default:** value of `path`. * path: mandatory. The path of the data to load for the object. For more information on path syntax, see `json_extract <../functions/json.rst#json_extract>`_. Note @@ -33,7 +28,7 @@ The `spath` command currently does not support pushdown behavior for extraction. Example 1: Simple Field Extraction ================================== -The simplest spath is to extract a single field. This extracts `n` from the `doc` field of type `text`. +The simplest spath is to extract a single field. This example extracts `n` from the `doc` field of type `text`. PPL query:: @@ -48,9 +43,9 @@ PPL query:: +----------+---+ Example 2: Lists & Nesting -============================ +========================== -These queries demonstrate more JSON path uses, like traversing nested fields and extracting list elements. +This example demonstrates more JSON path uses, like traversing nested fields and extracting list elements. PPL query:: @@ -65,9 +60,9 @@ PPL query:: +------------------------------------------------------+---------------+--------------+--------+ Example 3: Sum of inner elements -============================ +================================ -The example shows extracting an inner field and doing statistics on it, using the docs from example 1. It also demonstrates that `spath` always returns strings for inner types. +This example shows extracting an inner field and doing statistics on it, using the docs from example 1. It also demonstrates that `spath` always returns strings for inner types. PPL query:: diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index 24b80d4675b..cae65c84c79 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -1,6 +1,6 @@ -============= +===== stats -============= +===== .. rubric:: Table of contents @@ -10,652 +10,68 @@ stats Description -============ -| Using ``stats`` command to calculate the aggregation from search result. - -The following table dataSources the aggregation functions and also indicates how the NULL/MISSING values is handled: - -+----------+-------------+-------------+ -| Function | NULL | MISSING | -+----------+-------------+-------------+ -| COUNT | Not counted | Not counted | -+----------+-------------+-------------+ -| SUM | Ignore | Ignore | -+----------+-------------+-------------+ -| AVG | Ignore | Ignore | -+----------+-------------+-------------+ -| MAX | Ignore | Ignore | -+----------+-------------+-------------+ -| MIN | Ignore | Ignore | -+----------+-------------+-------------+ -| FIRST | Ignore | Ignore | -+----------+-------------+-------------+ -| LAST | Ignore | Ignore | -+----------+-------------+-------------+ -| LIST | Ignore | Ignore | -+----------+-------------+-------------+ -| VALUES | Ignore | Ignore | -+----------+-------------+-------------+ +=========== +| The ``stats`` command calculates the aggregation from the search result. + Syntax -============ +====== stats [bucket_nullable=bool] ... [by-clause] +* aggregation: mandatory. An aggregation function. +* bucket_nullable: optional. Controls whether the stats command includes null buckets in group-by aggregations. When set to ``false``, the aggregation ignores records where the group-by field is null, resulting in faster performance by excluding null bucket. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. -* aggregation: mandatory. A aggregation function. The argument of aggregation must be field. - -* bucket_nullable: optional (since 3.3.0). Controls whether the stats command includes null buckets in group-by aggregations. When set to ``false``, the aggregation ignores records where the group-by field is null, resulting in faster performance by excluding null bucket. The default value of ``bucket_nullable`` is determined by ``plugins.ppl.syntax.legacy.preferred``: - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``bucket_nullable`` defaults to ``true`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``bucket_nullable`` defaults to ``false`` - -* by-clause: optional. - - * Syntax: by [span-expression,] [field,]... - * Description: The by clause could be the fields and expressions like scalar functions and aggregation functions. Besides, the span clause can be used to split specific field into buckets in the same interval, the stats then does the aggregation by these span buckets. - * Default: If no is specified, the stats command returns only one row, which is the aggregation over the entire result set. - -* span-expression: optional, at most one. - - * Syntax: span([field_expr,] interval_expr) - * Description: The unit of the interval expression is the natural unit by default. If ``field_expr`` is omitted, span will use the implicit ``@timestamp`` field. An error will be thrown if this field doesn't exist. **If the field is a date/time type field, the aggregation results always ignore null bucket**. And the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``. -* Available time unit: - -+----------------------------+ -| Span Interval Units | -+============================+ -| millisecond (ms) | -+----------------------------+ -| second (s) | -+----------------------------+ -| minute (m, case sensitive) | -+----------------------------+ -| hour (h) | -+----------------------------+ -| day (d) | -+----------------------------+ -| week (w) | -+----------------------------+ -| month (M, case sensitive) | -+----------------------------+ -| quarter (q) | -+----------------------------+ -| year (y) | -+----------------------------+ + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``bucket_nullable`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``bucket_nullable`` defaults to ``false`` -Configuration -============= -Some aggregation functions require Calcite to be enabled for proper functionality. To enable Calcite, use the following command: +* by-clause: optional. Groups results by specified fields or expressions. Syntax: by [span-expression,] [field,]... **Default:** If no by-clause is specified, the stats command returns only one row, which is the aggregation over the entire result set. +* span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). The unit of the interval expression is the natural unit by default. If the field is a date/time type field, the aggregation results always ignore null bucket. For example, ``span(age, 10)`` creates 10-year age buckets, ``span(timestamp, 1h)`` creates hourly buckets. -Enable Calcite:: + * Available time units - >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ - "persistent" : { - "plugins.calcite.enabled" : true - } - }' + * millisecond (ms) + * second (s) + * minute (m, case sensitive) + * hour (h) + * day (d) + * week (w) + * month (M, case sensitive) + * quarter (q) + * year (y) Aggregation Functions ===================== -COUNT ------ - -Description ->>>>>>>>>>> - -Usage: Returns a count of the number of expr in the rows retrieved. The ``C()`` function, ``c``, and ``count`` can be used as abbreviations for ``COUNT()``. To perform a filtered counting, wrap the condition to satisfy in an `eval` expression. - -Example:: - - os> source=accounts | stats count(), c(), count, c; - fetched rows / total rows = 1/1 - +---------+-----+-------+---+ - | count() | c() | count | c | - |---------+-----+-------+---| - | 4 | 4 | 4 | 4 | - +---------+-----+-------+---+ - -Example of filtered counting:: - - os> source=accounts | stats count(eval(age > 30)) as mature_users; - fetched rows / total rows = 1/1 - +--------------+ - | mature_users | - |--------------| - | 3 | - +--------------+ - -Example of filtered counting with complex conditions:: - - os> source=accounts | stats count(eval(age > 30 and balance > 25000)) as high_value_users; - fetched rows / total rows = 1/1 - +------------------+ - | high_value_users | - |------------------| - | 1 | - +------------------+ - -SUM ---- - -Description ->>>>>>>>>>> - -Usage: SUM(expr). Returns the sum of expr. - -Example:: - - os> source=accounts | stats sum(age) by gender; - fetched rows / total rows = 2/2 - +----------+--------+ - | sum(age) | gender | - |----------+--------| - | 28 | F | - | 101 | M | - +----------+--------+ - -AVG ---- - -Description ->>>>>>>>>>> - -Usage: AVG(expr). Returns the average value of expr. - -Example:: - - os> source=accounts | stats avg(age) by gender; - fetched rows / total rows = 2/2 - +--------------------+--------+ - | avg(age) | gender | - |--------------------+--------| - | 28.0 | F | - | 33.666666666666664 | M | - +--------------------+--------+ - -MAX ---- - -Description ->>>>>>>>>>> - -Usage: MAX(expr). Returns the maximum value of expr. - -For non-numeric fields, values are sorted lexicographically. - -Note: Non-numeric field support requires Calcite to be enabled (see `Configuration`_ section above). Available since version 3.3.0. - -Example:: - - os> source=accounts | stats max(age); - fetched rows / total rows = 1/1 - +----------+ - | max(age) | - |----------| - | 36 | - +----------+ - -Example with text field:: - - os> source=accounts | stats max(firstname); - fetched rows / total rows = 1/1 - +----------------+ - | max(firstname) | - |----------------| - | Nanette | - +----------------+ - -MIN ---- - -Description ->>>>>>>>>>> - -Usage: MIN(expr). Returns the minimum value of expr. - -For non-numeric fields, values are sorted lexicographically. - -Note: Non-numeric field support requires Calcite to be enabled (see `Configuration`_ section above). Available since version 3.3.0. - -Example:: - - os> source=accounts | stats min(age); - fetched rows / total rows = 1/1 - +----------+ - | min(age) | - |----------| - | 28 | - +----------+ - -Example with text field:: - - os> source=accounts | stats min(firstname); - fetched rows / total rows = 1/1 - +----------------+ - | min(firstname) | - |----------------| - | Amber | - +----------------+ - -VAR_SAMP --------- - -Description ->>>>>>>>>>> - -Usage: VAR_SAMP(expr). Returns the sample variance of expr. - -Example:: - - os> source=accounts | stats var_samp(age); - fetched rows / total rows = 1/1 - +--------------------+ - | var_samp(age) | - |--------------------| - | 10.916666666666666 | - +--------------------+ - -VAR_POP -------- - -Description ->>>>>>>>>>> - -Usage: VAR_POP(expr). Returns the population standard variance of expr. - -Example:: - - os> source=accounts | stats var_pop(age); - fetched rows / total rows = 1/1 - +--------------+ - | var_pop(age) | - |--------------| - | 8.1875 | - +--------------+ - -STDDEV_SAMP ------------ - -Description ->>>>>>>>>>> - -Usage: STDDEV_SAMP(expr). Return the sample standard deviation of expr. - -Example:: - - os> source=accounts | stats stddev_samp(age); - fetched rows / total rows = 1/1 - +-------------------+ - | stddev_samp(age) | - |-------------------| - | 3.304037933599835 | - +-------------------+ - -STDDEV_POP ----------- - -Description ->>>>>>>>>>> - -Usage: STDDEV_POP(expr). Return the population standard deviation of expr. - -Example:: - - os> source=accounts | stats stddev_pop(age); - fetched rows / total rows = 1/1 - +--------------------+ - | stddev_pop(age) | - |--------------------| - | 2.8613807855648994 | - +--------------------+ - -DISTINCT_COUNT_APPROX ---------------------- - -Description ->>>>>>>>>>> - -Version: 3.1.0 - -Usage: DISTINCT_COUNT_APPROX(expr). Return the approximate distinct count value of the expr, using the hyperloglog++ algorithm. - -Example:: - - PPL> source=accounts | stats distinct_count_approx(gender); - fetched rows / total rows = 1/1 - +-------------------------------+ - | distinct_count_approx(gender) | - |-------------------------------| - | 2 | - +-------------------------------+ - -TAKE ----- - -Description ->>>>>>>>>>> - -Usage: TAKE(field [, size]). Return original values of a field. It does not guarantee on the order of values. - -* field: mandatory. The field must be a text field. -* size: optional integer. The number of values should be returned. Default is 10. - -Example:: - - os> source=accounts | stats take(firstname); - fetched rows / total rows = 1/1 - +-----------------------------+ - | take(firstname) | - |-----------------------------| - | [Amber,Hattie,Nanette,Dale] | - +-----------------------------+ - -PERCENTILE or PERCENTILE_APPROX -------------------------------- - -Description ->>>>>>>>>>> - -Usage: PERCENTILE(expr, percent) or PERCENTILE_APPROX(expr, percent). Return the approximate percentile value of expr at the specified percentage. - -* percent: The number must be a constant between 0 and 100. - -Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref `issue link `_. - -Example:: - - os> source=accounts | stats percentile(age, 90) by gender; - fetched rows / total rows = 2/2 - +---------------------+--------+ - | percentile(age, 90) | gender | - |---------------------+--------| - | 28 | F | - | 36 | M | - +---------------------+--------+ - -Percentile Shortcut Functions ->>>>>>>>>>>>>>>>>>>>>>>>>>>>> - -Version: 3.3.0 - -For convenience, OpenSearch PPL provides shortcut functions for common percentiles: - -- ``PERC(expr)`` - Equivalent to ``PERCENTILE(expr, )`` -- ``P(expr)`` - Equivalent to ``PERCENTILE(expr, )`` - -Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``). - -Example:: - - ppl> source=accounts | stats perc99.5(age); - fetched rows / total rows = 1/1 - +---------------+ - | perc99.5(age) | - |---------------| - | 36 | - +---------------+ - - ppl> source=accounts | stats p50(age); - fetched rows / total rows = 1/1 - +---------+ - | p50(age) | - |---------| - | 32 | - +---------+ - -MEDIAN ------- - -Description ->>>>>>>>>>> - -Version: 3.3.0 - -Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``. - -Note: This function requires Calcite to be enabled (see `Configuration`_ section above). - -Example:: - - os> source=accounts | stats median(age); - fetched rows / total rows = 1/1 - +-------------+ - | median(age) | - |-------------| - | 33 | - +-------------+ - -EARLIEST --------- - -Description ->>>>>>>>>>> - -Version: 3.3.0 - -Usage: EARLIEST(field [, time_field]). Return the earliest value of a field based on timestamp ordering. - -* field: mandatory. The field to return the earliest value for. -* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. - -Note: This function requires Calcite to be enabled (see `Configuration`_ section above). - -Example:: - - os> source=events | stats earliest(message) by host | sort host; - fetched rows / total rows = 2/2 - +-------------------+---------+ - | earliest(message) | host | - |-------------------+---------| - | Starting up | server1 | - | Initializing | server2 | - +-------------------+---------+ - -Example with custom time field:: - - os> source=events | stats earliest(status, event_time) by category | sort category; - fetched rows / total rows = 2/2 - +------------------------------+----------+ - | earliest(status, event_time) | category | - |------------------------------+----------| - | pending | orders | - | active | users | - +------------------------------+----------+ - -LATEST ------- - -Description ->>>>>>>>>>> - -Version: 3.3.0 - -Usage: LATEST(field [, time_field]). Return the latest value of a field based on timestamp ordering. - -* field: mandatory. The field to return the latest value for. -* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. - -Note: This function requires Calcite to be enabled (see `Configuration`_ section above). - -Example:: - - os> source=events | stats latest(message) by host | sort host; - fetched rows / total rows = 2/2 - +------------------+---------+ - | latest(message) | host | - |------------------+---------| - | Shutting down | server1 | - | Maintenance mode | server2 | - +------------------+---------+ - -Example with custom time field:: - - os> source=events | stats latest(status, event_time) by category | sort category; - fetched rows / total rows = 2/2 - +----------------------------+----------+ - | latest(status, event_time) | category | - |----------------------------+----------| - | cancelled | orders | - | inactive | users | - +----------------------------+----------+ - -FIRST ------ - -Description ->>>>>>>>>>> - -Version: 3.3.0 - -Usage: FIRST(field). Return the first non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. - -* field: mandatory. The field to return the first value for. - -Note: This function requires Calcite to be enabled (see `Configuration`_ section above). - -Example:: - - os> source=accounts | stats first(firstname) by gender; - fetched rows / total rows = 2/2 - +------------------+--------+ - | first(firstname) | gender | - |------------------+--------| - | Nanette | F | - | Amber | M | - +------------------+--------+ - -Example with count aggregation:: - - os> source=accounts | stats first(firstname), count() by gender; - fetched rows / total rows = 2/2 - +------------------+---------+--------+ - | first(firstname) | count() | gender | - |------------------+---------+--------| - | Nanette | 1 | F | - | Amber | 3 | M | - +------------------+---------+--------+ - -LAST ----- - -Description ->>>>>>>>>>> - -Version: 3.3.0 - -Usage: LAST(field). Return the last non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. - -* field: mandatory. The field to return the last value for. - -Note: This function requires Calcite to be enabled (see `Configuration`_ section above). - -Example:: - - os> source=accounts | stats last(firstname) by gender; - fetched rows / total rows = 2/2 - +-----------------+--------+ - | last(firstname) | gender | - |-----------------+--------| - | Nanette | F | - | Dale | M | - +-----------------+--------+ - -Example with different fields:: - - os> source=accounts | stats first(account_number), last(balance), first(age); - fetched rows / total rows = 1/1 - +-----------------------+---------------+------------+ - | first(account_number) | last(balance) | first(age) | - |-----------------------+---------------+------------| - | 1 | 4180 | 32 | - +-----------------------+---------------+------------+ - -LIST ----- - -Description ->>>>>>>>>>> - -Version: 3.3.0 (Calcite engine only) - -Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved. -The function returns up to 100 values with no guaranteed ordering. - -* expr: The field expression to collect values from. -* This aggregation function doesn't support Array, Struct, Object field types. - -Example with string fields:: - - PPL> source=accounts | stats list(firstname); - fetched rows / total rows = 1/1 - +-------------------------------------+ - | list(firstname) | - |-------------------------------------|` - | ["Amber","Hattie","Nanette","Dale"] | - +-------------------------------------+ - -Example with result field rename:: - - PPL> source=accounts | stats list(firstname) as names; - fetched rows / total rows = 1/1 - +-------------------------------------+ - | names | - |-------------------------------------| - | ["Amber","Hattie","Nanette","Dale"] | - +-------------------------------------+ - -VALUES ------- - -Description ->>>>>>>>>>> - -Version: 3.3.0 (Calcite engine only) - -Usage: VALUES(expr). Collects all unique values from the specified expression into a sorted array. Values are converted to strings, nulls are filtered, and duplicates are removed. - -The maximum number of unique values returned is controlled by the ``plugins.ppl.values.max.limit`` setting: - -* Default value is 0, which means unlimited values are returned -* Can be configured to any positive integer to limit the number of unique values -* See the `PPL Settings <../admin/settings.rst#plugins-ppl-values-max-limit>`_ documentation for more details - -Example with string fields:: - - PPL> source=accounts | stats values(firstname); - fetched rows / total rows = 1/1 - +-------------------------------------+ - | values(firstname) | - |-------------------------------------| - | ["Amber","Dale","Hattie","Nanette"] | - +-------------------------------------+ - -Example with numeric fields (sorted as strings):: - - PPL> source=accounts | stats values(age); - fetched rows / total rows = 1/1 - +---------------------------+ - | values(age) | - |---------------------------| - | ["28","32","33","36","39"] | - +---------------------------+ - -Example with result field rename:: - - PPL> source=accounts | stats values(firstname) as unique_names; - fetched rows / total rows = 1/1 - +-------------------------------------+ - | unique_names | - |-------------------------------------| - | ["Amber","Dale","Hattie","Nanette"] | - +-------------------------------------+ +The stats command supports the following aggregation functions: + +* COUNT/C: Count of values +* SUM: Sum of numeric values +* AVG: Average of numeric values +* MAX: Maximum value +* MIN: Minimum value +* VAR_SAMP: Sample variance +* VAR_POP: Population variance +* STDDEV_SAMP: Sample standard deviation +* STDDEV_POP: Population standard deviation +* DISTINCT_COUNT_APPROX: Approximate distinct count +* TAKE: List of original values +* PERCENTILE/PERCENTILE_APPROX: Percentile calculations +* PERC/P: Percentile shortcut functions +* MEDIAN: 50th percentile +* EARLIEST: Earliest value by timestamp +* LATEST: Latest value by timestamp +* FIRST: First non-null value +* LAST: Last non-null value +* LIST: Collect all values into array +* VALUES: Collect unique values into sorted array + +For detailed documentation of each function, see `Aggregation Functions <../functions/aggregations.rst>`_. Example 1: Calculate the count of events ======================================== -The example show calculate the count of events in the accounts. +This example shows calculating the count of events in the accounts. PPL query:: @@ -671,7 +87,7 @@ PPL query:: Example 2: Calculate the average of a field =========================================== -The example show calculate the average age of all the accounts. +This example shows calculating the average age of all the accounts. PPL query:: @@ -687,7 +103,7 @@ PPL query:: Example 3: Calculate the average of a field by group ==================================================== -The example show calculate the average age of all the accounts group by gender. +This example shows calculating the average age of all the accounts group by gender. PPL query:: @@ -704,7 +120,7 @@ PPL query:: Example 4: Calculate the average, sum and count of a field by group =================================================================== -The example show calculate the average age, sum age and count of events of all the accounts group by gender. +This example shows calculating the average age, sum age and count of events of all the accounts group by gender. PPL query:: @@ -830,7 +246,7 @@ PPL query:: Example 11: Calculate the percentile of a field =============================================== -The example show calculate the percentile 90th age of all the accounts. +This example shows calculating the percentile 90th age of all the accounts. PPL query:: @@ -846,7 +262,7 @@ PPL query:: Example 12: Calculate the percentile of a field by group ======================================================== -The example show calculate the percentile 90th age of all the accounts group by gender. +This example shows calculating the percentile 90th age of all the accounts group by gender. PPL query:: @@ -894,7 +310,6 @@ PPL query:: Example 15: Ignore null bucket ============================== -Note: This argument requires version 3.3.0 or above. PPL query:: diff --git a/docs/user/ppl/cmd/streamstats.rst b/docs/user/ppl/cmd/streamstats.rst index 0ac18637fec..ccda3383277 100644 --- a/docs/user/ppl/cmd/streamstats.rst +++ b/docs/user/ppl/cmd/streamstats.rst @@ -24,22 +24,29 @@ Difference between ``stats``, ``eventstats`` and ``streamstats`` All of these commands can be used to generate aggregations such as average, sum, and maximum, but they have some key differences in how they operate and what they produce: -* Transformation Behavior: - * ``stats``: Transforms all events into an aggregated result table, losing original event structure. - * ``eventstats``: Adds aggregation results as new fields to the original events without removing the event structure. - * ``streamstats``: Adds cumulative (running) aggregation results to each event as they stream through the pipeline. -* Output Format: - * ``stats``: Output contains only aggregated values. Original raw events are not preserved. - * ``eventstats``: Original events remain, with extra fields containing summary statistics. - * ``streamstats``: Original events remain, with extra fields containing running totals or cumulative statistics. -* Aggregation Scope: - * ``stats``: Based on all events in the search (or groups defined by BY clause). - * ``eventstats``: Based on all relevant events, then the result is added back to each event in the group. - * ``streamstats``: Calculations occur progressively as each event is processed; can be scoped by window. -* Use Cases: - * ``stats``: When only aggregated results are needed (e.g., counts, averages, sums). - * ``eventstats``: When aggregated statistics are needed alongside original event data. - * ``streamstats``: When a running total or cumulative statistic is needed across event streams. +* Transformation Behavior + + * ``stats``: Transforms all events into an aggregated result table, losing original event structure. + * ``eventstats``: Adds aggregation results as new fields to the original events without removing the event structure. + * ``streamstats``: Adds cumulative (running) aggregation results to each event as they stream through the pipeline. + +* Output Format + + * ``stats``: Output contains only aggregated values. Original raw events are not preserved. + * ``eventstats``: Original events remain, with extra fields containing summary statistics. + * ``streamstats``: Original events remain, with extra fields containing running totals or cumulative statistics. + +* Aggregation Scope + + * ``stats``: Based on all events in the search (or groups defined by BY clause). + * ``eventstats``: Based on all relevant events, then the result is added back to each event in the group. + * ``streamstats``: Calculations occur progressively as each event is processed; can be scoped by window. + +* Use Cases + + * ``stats``: When only aggregated results are needed (e.g., counts, averages, sums). + * ``eventstats``: When aggregated statistics are needed alongside original event data. + * ``streamstats``: When a running total or cumulative statistic is needed across event streams. Syntax ====== @@ -53,7 +60,9 @@ streamstats [current=] [window=] [global=] [reset_before="("")". **Default:** false. * by-clause: optional. The by clause could be the fields and expressions like scalar functions and aggregation functions. Besides, the span clause can be used to split specific field into buckets in the same interval, the stats then does the aggregation by these span buckets. Syntax: by [span-expression,] [field,]... **Default:** If no is specified, all events are processed as a single group and running statistics are computed across the entire event stream. * span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). For example, ``span(age, 10)`` creates 10-year age buckets, ``span(timestamp, 1h)`` creates hourly buckets. - * Available time units: + + * Available time units + * millisecond (ms) * second (s) * minute (m, case sensitive) @@ -82,7 +91,7 @@ The streamstats command supports the following aggregation functions: * EARLIEST: Earliest value by timestamp * LATEST: Latest value by timestamp -For detailed documentation of each function, see `Aggregation Functions <../functions/aggregation.rst>`_. +For detailed documentation of each function, see `Aggregation Functions <../functions/aggregations.rst>`_. Usage ===== diff --git a/docs/user/ppl/cmd/subquery.rst b/docs/user/ppl/cmd/subquery.rst index f7883202c70..48491db22e2 100644 --- a/docs/user/ppl/cmd/subquery.rst +++ b/docs/user/ppl/cmd/subquery.rst @@ -1,6 +1,6 @@ -============= -subquery (aka subsearch) -============= +======== +subquery +======== .. rubric:: Table of contents @@ -10,66 +10,45 @@ subquery (aka subsearch) Description -============ -| (Experimental) -| (From 3.0.0) -| The subquery (aka subsearch) commands contain 4 types: ``InSubquery``, ``ExistsSubquery``, ``ScalarSubquery`` and ``RelationSubquery``. The first three are expressions, they are used in WHERE clause (``where ``) and search filter(``search source=* ``). ``RelationSubquery`` is not an expression, it is a statement. +=========== +| The ``subquery`` command allows you to embed one PPL query inside another, enabling complex filtering and data retrieval operations. A subquery is a nested query that executes first and returns results that are used by the outer query for filtering, comparison, or joining operations. -Version -======= -3.0.0 +| Subqueries are useful for: + +1. Filtering data based on results from another query +2. Checking for the existence of related data +3. Performing calculations that depend on aggregated values from other tables +4. Creating complex joins with dynamic conditions Syntax ====== -Subquery (aka subsearch) has the same syntax with search command, except that it must be enclosed in square brackets. +subquery: [ source=... | ... | ... ] -InSubquery:: +Subqueries use the same syntax as regular PPL queries but must be enclosed in square brackets. There are four main types of subqueries: + +**IN Subquery** +Tests whether a field value exists in the results of a subquery:: where [not] in [ source=... | ... | ... ] -ExistsSubquery:: +**EXISTS Subquery** +Tests whether a subquery returns any results:: where [not] exists [ source=... | ... | ... ] -ScalarSubquery:: +**Scalar Subquery** +Returns a single value that can be used in comparisons or calculations:: where = [ source=... | ... | ... ] -RelationSubquery:: +**Relation Subquery** +Used in join operations to provide dynamic right-side data:: | join ON condition [ source=... | ... | ... ] - Configuration ============= -plugins.calcite.enabled ------------------------ - -This command requires Calcite enabled. In 3.0.0-beta, as an experimental the Calcite configuration is disabled by default. - -Enable Calcite:: - - >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ - "transient" : { - "plugins.calcite.enabled" : true - } - }' - -Result set:: - - { - "acknowledged": true, - "persistent": { - "plugins": { - "calcite": { - "enabled": "true" - } - } - }, - "transient": {} - } - plugins.ppl.subsearch.maxout ---------------------------- @@ -94,7 +73,6 @@ Change the subsearch.maxout to unlimited:: "transient": {} } - Usage ===== @@ -162,11 +140,11 @@ RelationSubquery:: source = table1 | join left = l right = r on condition [ source = table2 | where d > 10 | head 5 ] //subquery in join right side source = [ source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] | stats count(a) by b ] as outer | head 1 - - Example 1: TPC-H q20 ==================== +This example shows a complex TPC-H query 20 implementation using nested subqueries. + PPL query:: >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ @@ -199,6 +177,8 @@ PPL query:: Example 2: TPC-H q22 ==================== +This example shows a TPC-H query 22 implementation using EXISTS and scalar subqueries. + PPL query:: >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ diff --git a/docs/user/ppl/cmd/syntax.rst b/docs/user/ppl/cmd/syntax.rst index 45ffea8ff67..c15aad68e15 100644 --- a/docs/user/ppl/cmd/syntax.rst +++ b/docs/user/ppl/cmd/syntax.rst @@ -1,6 +1,6 @@ -============= +====== Syntax -============= +====== .. rubric:: Table of contents diff --git a/docs/user/ppl/cmd/table.rst b/docs/user/ppl/cmd/table.rst index f8ecbb11f18..3512a648a1c 100644 --- a/docs/user/ppl/cmd/table.rst +++ b/docs/user/ppl/cmd/table.rst @@ -1,6 +1,6 @@ -============= +===== table -============= +===== .. rubric:: Table of contents @@ -10,26 +10,20 @@ table Description -============ +=========== The ``table`` command is an alias for the `fields `_ command and provides the same field selection capabilities. It allows you to keep or remove fields from the search result using enhanced syntax options. -Note: The ``table`` command requires the Calcite to be enabled. All enhanced field features are available through this command. For detailed examples and documentation of all enhanced features, see the `fields command documentation `_. - -Version -======= -3.3.0 - Syntax -============ +====== table [+|-] -* index: optional. if the plus (+) is used, only the fields specified in the field list will be keep. if the minus (-) is used, all the fields specified in the field list will be removed. **Default** + -* field list: mandatory. Fields can be specified using various enhanced syntax options. +* [+|-]: optional. If the plus (+) is used, only the fields specified in the field list will be kept. If the minus (-) is used, all the fields specified in the field list will be removed. **Default:** +. +* field-list: mandatory. Comma-delimited or space-delimited list of fields to keep or remove. Supports wildcard patterns. Example 1: Basic table command usage -------------------------------------- +==================================== -The ``table`` command works identically to the ``fields`` command. This example shows basic field selection. +This example shows basic field selection using the table command. PPL query:: @@ -44,20 +38,7 @@ PPL query:: | Dale | Adams | 33 | +-----------+----------+-----+ -Enhanced Features -================= - -The ``table`` command supports all enhanced features available in the ``fields`` command, including: - -- Space-delimited syntax -- Wildcard pattern matching (prefix, suffix, contains) -- Mixed delimiters -- Field deduplication -- Full wildcard selection -- Wildcard exclusion -Requirements -============ -- **Calcite Engine**: The ``table`` command requires the Calcite engine to be enabled -- **Feature Parity**: All enhanced features available in ``fields`` are also available in ``table`` -- **Error Handling**: Attempting to use the ``table`` command without Calcite will result in an ``UnsupportedOperationException`` \ No newline at end of file +See Also +======== +- `fields `_ - Alias command with identical functionality \ No newline at end of file diff --git a/docs/user/ppl/cmd/timechart.rst b/docs/user/ppl/cmd/timechart.rst index 6aea1270f1e..f336007d8fc 100644 --- a/docs/user/ppl/cmd/timechart.rst +++ b/docs/user/ppl/cmd/timechart.rst @@ -1,6 +1,6 @@ -============= +========= timechart -============= +========= .. rubric:: Table of contents @@ -10,25 +10,16 @@ timechart Description -============ +=========== | The ``timechart`` command creates a time-based aggregation of data. It groups data by time intervals and optionally by a field, then applies an aggregation function to each group. The results are returned in an unpivoted format with separate rows for each time-field combination. -Version -======= -3.3.0 - Syntax -============ - -.. code-block:: text +====== - timechart [span=] [limit=] [useother=] [by ] +timechart [span=] [limit=] [useother=] [by ] -**Parameters:** +* span: optional. Specifies the time interval for grouping data. **Default:** 1m (1 minute). -* **span**: optional. Specifies the time interval for grouping data. - - * Default: 1m (1 minute) * Available time units: * millisecond (ms) @@ -41,31 +32,27 @@ Syntax * quarter (q) * year (y) -* **limit**: optional. Specifies the maximum number of distinct values to display when using the "by" clause. +* limit: optional. Specifies the maximum number of distinct values to display when using the "by" clause. **Default:** 10. - * Default: 10 * When there are more distinct values than the limit, the additional values are grouped into an "OTHER" category if useother is not set to false. * The "most distinct" values are determined by calculating the sum of the aggregation values across all time intervals for each distinct field value. The top N values with the highest sums are displayed individually, while the rest are grouped into the "OTHER" category. * Set to 0 to show all distinct values without any limit (when limit=0, useother is automatically set to false). * The parameters can be specified in any order before the aggregation function. * Only applies when using the "by" clause to group results. -* **useother**: optional. Controls whether to create an "OTHER" category for values beyond the limit. +* useother: optional. Controls whether to create an "OTHER" category for values beyond the limit. **Default:** true. - * Default: true * When set to false, only the top N values (based on limit) are shown without an "OTHER" column. * When set to true, values beyond the limit are grouped into an "OTHER" category. * Only applies when using the "by" clause and when there are more distinct values than the limit. -* **by**: optional. Groups the results by the specified field in addition to time intervals. - - * If not specified, the aggregation is performed across all documents in each time interval. - -* **aggregation_function**: mandatory. The aggregation function to apply to each time bucket. +* aggregation_function: mandatory. The aggregation function to apply to each time bucket. * Currently, only a single aggregation function is supported. * Available functions: All aggregation functions supported by the :doc:`stats ` command, as well as the timechart-specific aggregations listed below. +* by: optional. Groups the results by the specified field in addition to time intervals. If not specified, the aggregation is performed across all documents in each time interval. + PER_SECOND ---------- @@ -114,15 +101,6 @@ Notes * **Null values**: Documents with null values in the "by" field are treated as a separate category and appear as null in the results. -Limitations -============ -* Only a single aggregation function is supported per timechart command. -* The ``bins`` parameter and other bin options are not supported since the ``bin`` command is not implemented yet. Use the ``span`` parameter to control time intervals. - - -Examples -======== - Example 1: Count events by hour =============================== @@ -363,3 +341,9 @@ PPL query:: | 2023-01-01 10:30:00 | server1 | 0.1 | | 2023-01-01 10:30:00 | server2 | 0.05 | +---------------------+---------+---------------------+ + +Limitations +=========== +* Only a single aggregation function is supported per timechart command. +* The ``bins`` parameter and other bin options are not supported since the ``bin`` command is not implemented yet. Use the ``span`` parameter to control time intervals. + diff --git a/docs/user/ppl/cmd/top.rst b/docs/user/ppl/cmd/top.rst index a786d7ed9a9..bdf22addf40 100644 --- a/docs/user/ppl/cmd/top.rst +++ b/docs/user/ppl/cmd/top.rst @@ -11,30 +11,29 @@ top Description =========== -| Using ``top`` command to find the most common tuple of values of all fields in the field list. - +| The ``top`` command finds the most common tuple of values of all fields in the field list. Syntax ====== -top [N] [by-clause] +top [N] [top-options] [by-clause] + +* N: optional. number of results to return. **Default**: 10 +* top-options: optional. options for the top command. Supported syntax is [countfield=] [showcount=]. -top [N] [top-options] [by-clause] ``(available from 3.1.0+)`` + * showcount=: optional. whether to create a field in output that represent a count of the tuple of values. **Default:** true. + * countfield=: optional. the name of the field that contains count. **Default:** 'count'. + * usenull=: optional (since 3.4.0). whether to output the null value. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. + + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` -* N: number of results to return. **Default**: 10 * field-list: mandatory. comma-delimited list of field names. * by-clause: optional. one or more fields to group the results by. -* top-options: optional. options for the top command. Supported syntax is [countfield=] [showcount=]. -* showcount=: optional. whether to create a field in output that represent a count of the tuple of values. Default value is ``true``. -* countfield=: optional. the name of the field that contains count. Default value is ``'count'``. -* usenull=: optional (since 3.4.0). whether to output the null value. The default value of ``usenull`` is determined by ``plugins.ppl.syntax.legacy.preferred``: - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` Example 1: Find the most common values in a field ================================================= -The example finds most common gender of all the accounts. +This example finds the most common gender of all the accounts. PPL query:: @@ -47,10 +46,10 @@ PPL query:: | F | +--------+ -Example 2: Find the most common values in a field -================================================= +Example 2: Limit results to top N values +======================================== -The example finds most common gender of all the accounts. +This example finds the most common gender and limits results to 1 value. PPL query:: @@ -62,10 +61,10 @@ PPL query:: | M | +--------+ -Example 2: Find the most common values organized by gender -========================================================== +Example 3: Find the most common values grouped by field +======================================================= -The example finds most common age of all the accounts group by gender. +This example finds the most common age of all the accounts grouped by gender. PPL query:: @@ -78,8 +77,10 @@ PPL query:: | M | 32 | +--------+-----+ -Example 3: Top command with Calcite enabled -=========================================== +Example 4: Top command with count field +======================================= + +This example finds the most common gender of all the accounts and includes the count. PPL query:: @@ -93,9 +94,11 @@ PPL query:: +--------+-------+ -Example 4: Specify the count field option +Example 5: Specify the count field option ========================================= +This example specifies a custom name for the count field. + PPL query:: os> source=accounts | top countfield='cnt' gender; diff --git a/docs/user/ppl/cmd/trendline.rst b/docs/user/ppl/cmd/trendline.rst index d7fb6544ae6..e2fd067d262 100644 --- a/docs/user/ppl/cmd/trendline.rst +++ b/docs/user/ppl/cmd/trendline.rst @@ -1,6 +1,6 @@ -============= +========= trendline -============= +========= .. rubric:: Table of contents @@ -10,41 +10,25 @@ trendline Description -============ -| Using ``trendline`` command to calculate moving averages of fields. +=========== +| The ``trendline`` command calculates moving averages of fields. Syntax -============ -`TRENDLINE [sort <[+|-] sort-field>] [SMA|WMA](number-of-datapoints, field) [AS alias] [[SMA|WMA](number-of-datapoints, field) [AS alias]]...` +====== +trendline [sort <[+|-] sort-field>] [sma|wma](number-of-datapoints, field) [as ] [[sma|wma](number-of-datapoints, field) [as ]]... * [+|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. * sort-field: mandatory when sorting is used. The field used to sort. +* sma|wma: mandatory. Simple Moving Average (sma) applies equal weighting to all values, Weighted Moving Average (wma) applies greater weight to more recent values. * number-of-datapoints: mandatory. The number of datapoints to calculate the moving average (must be greater than zero). * field: mandatory. The name of the field the moving average should be calculated for. -* alias: optional. The name of the resulting column containing the moving average (defaults to the field name with "_trendline"). - -Starting with version 3.1.0, two trendline algorithms are supported, aka Simple Moving Average (SMA) and Weighted Moving Average (WMA). - -Suppose: - -* f[i]: The value of field 'f' in the i-th data-point -* n: The number of data-points in the moving window (period) -* t: The current time index - -SMA is calculated like - - SMA(t) = (1/n) * Σ(f[i]), where i = t-n+1 to t - -WMA places more weights on recent values compared to equal-weighted SMA algorithm - - WMA(t) = (1/(1 + 2 + ... + n)) * Σ(1 * f[i-n+1] + 2 * f[t-n+2] + ... + n * f[t]) - = (2/(n * (n + 1))) * Σ((i - t + n) * f[i]), where i = t-n+1 to t +* alias: optional. The name of the resulting column containing the moving average. **Default:** field name with "_trendline". Example 1: Calculate the simple moving average on one field. -===================================================== +============================================================ -The example shows how to calculate the simple moving average on one field. +This example shows how to calculate the simple moving average on one field. PPL query:: @@ -61,9 +45,9 @@ PPL query:: Example 2: Calculate the simple moving average on multiple fields. -=========================================================== +================================================================== -The example shows how to calculate the simple moving average on multiple fields. +This example shows how to calculate the simple moving average on multiple fields. PPL query:: @@ -79,9 +63,9 @@ PPL query:: +------+-----------+ Example 3: Calculate the simple moving average on one field without specifying an alias. -================================================================================= +======================================================================================== -The example shows how to calculate the simple moving average on one field. +This example shows how to calculate the simple moving average on one field. PPL query:: @@ -97,25 +81,9 @@ PPL query:: +--------------------------+ Example 4: Calculate the weighted moving average on one field. -================================================================================= - -Version -------- -3.1.0 - -Configuration -------------- -wma algorithm requires Calcite enabled. - -Enable Calcite: - - >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ - "persistent" : { - "plugins.calcite.enabled" : true - } - }' +============================================================== -The example shows how to calculate the weighted moving average on one field. +This example shows how to calculate the weighted moving average on one field. PPL query:: @@ -132,4 +100,4 @@ PPL query:: Limitations =========== -Starting with version 3.1.0, the ``trendline`` command requires all values in the specified ``field`` to be non-null. Any rows with null values present in the calculation field will be automatically excluded from the command's output. \ No newline at end of file +The ``trendline`` command requires all values in the specified ``field`` to be non-null. Any rows with null values present in the calculation field will be automatically excluded from the command's output. \ No newline at end of file diff --git a/docs/user/ppl/cmd/where.rst b/docs/user/ppl/cmd/where.rst index 9bdb8a75aa3..324af4dcb54 100644 --- a/docs/user/ppl/cmd/where.rst +++ b/docs/user/ppl/cmd/where.rst @@ -1,6 +1,6 @@ -============= +===== where -============= +===== .. rubric:: Table of contents @@ -11,22 +11,18 @@ where Description =========== -| The ``where`` command bool-expression to filter the search result. The ``where`` command only return the result when bool-expression evaluated to true. - +| The ``where`` command filters the search result. The ``where`` command only returns the result when the bool-expression evaluates to true. Syntax ====== where -* bool-expression: optional. any expression which could be evaluated to boolean value. - -Examples -======== +* bool-expression: optional. Any expression which could be evaluated to boolean value. Example 1: Filter result set with condition --------------------------------------------- +=========================================== -The example show fetch all the document from accounts index with . +This example shows fetching all the documents from the accounts index where account_number is 1 or gender is "F". PPL query:: @@ -40,7 +36,7 @@ PPL query:: +----------------+--------+ Example 2: Basic Field Comparison ----------------------------------- +================================= The example shows how to filter accounts with balance greater than 30000. @@ -56,7 +52,7 @@ PPL query:: +----------------+---------+ Example 3: Pattern Matching with LIKE --------------------------------------- +===================================== Pattern Matching with Underscore (_) @@ -87,7 +83,7 @@ PPL query:: +----------------+-------+ Example 4: Multiple Conditions -------------------------------- +============================== The example shows how to combine multiple conditions using AND operator. @@ -104,7 +100,7 @@ PPL query:: +----------------+-----+--------+ Example 5: Using IN Operator ------------------------------ +============================ The example demonstrates using IN operator to match multiple values. @@ -120,7 +116,7 @@ PPL query:: +----------------+-------+ Example 6: NULL Checks ----------------------- +====================== The example shows how to filter records with NULL values. @@ -135,7 +131,7 @@ PPL query:: +----------------+----------+ Example 7: Complex Conditions ------------------------------- +============================= The example demonstrates combining multiple conditions with parentheses and logical operators. @@ -150,7 +146,7 @@ PPL query:: +----------------+---------+-----+--------+ Example 8: NOT Conditions --------------------------- +========================= The example shows how to use NOT operator to exclude matching records. diff --git a/docs/user/ppl/functions/aggregations.rst b/docs/user/ppl/functions/aggregations.rst new file mode 100644 index 00000000000..6605bda0765 --- /dev/null +++ b/docs/user/ppl/functions/aggregations.rst @@ -0,0 +1,522 @@ +===================== +Aggregation Functions +===================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Aggregation functions perform calculations across multiple rows to return a single result value. These functions are used with ``stats`` and ``eventstats`` commands to analyze and summarize data. + +| The following table shows how NULL/MISSING values are handled by aggregation functions: + ++----------+-------------+-------------+ +| Function | NULL | MISSING | ++----------+-------------+-------------+ +| COUNT | Not counted | Not counted | ++----------+-------------+-------------+ +| SUM | Ignore | Ignore | ++----------+-------------+-------------+ +| AVG | Ignore | Ignore | ++----------+-------------+-------------+ +| MAX | Ignore | Ignore | ++----------+-------------+-------------+ +| MIN | Ignore | Ignore | ++----------+-------------+-------------+ +| FIRST | Ignore | Ignore | ++----------+-------------+-------------+ +| LAST | Ignore | Ignore | ++----------+-------------+-------------+ +| LIST | Ignore | Ignore | ++----------+-------------+-------------+ +| VALUES | Ignore | Ignore | ++----------+-------------+-------------+ + +Functions +========= + +COUNT +----- + +Description +>>>>>>>>>>> + +Usage: Returns a count of the number of expr in the rows retrieved. The ``C()`` function, ``c``, and ``count`` can be used as abbreviations for ``COUNT()``. To perform a filtered counting, wrap the condition to satisfy in an `eval` expression. + +Example:: + + os> source=accounts | stats count(), c(), count, c; + fetched rows / total rows = 1/1 + +---------+-----+-------+---+ + | count() | c() | count | c | + |---------+-----+-------+---| + | 4 | 4 | 4 | 4 | + +---------+-----+-------+---+ + +Example of filtered counting:: + + os> source=accounts | stats count(eval(age > 30)) as mature_users; + fetched rows / total rows = 1/1 + +--------------+ + | mature_users | + |--------------| + | 3 | + +--------------+ + +SUM +--- + +Description +>>>>>>>>>>> + +Usage: SUM(expr). Returns the sum of expr. + +Example:: + + os> source=accounts | stats sum(age) by gender; + fetched rows / total rows = 2/2 + +----------+--------+ + | sum(age) | gender | + |----------+--------| + | 28 | F | + | 101 | M | + +----------+--------+ + +AVG +--- + +Description +>>>>>>>>>>> + +Usage: AVG(expr). Returns the average value of expr. + +Example:: + + os> source=accounts | stats avg(age) by gender; + fetched rows / total rows = 2/2 + +--------------------+--------+ + | avg(age) | gender | + |--------------------+--------| + | 28.0 | F | + | 33.666666666666664 | M | + +--------------------+--------+ + +MAX +--- + +Description +>>>>>>>>>>> + +Usage: MAX(expr). Returns the maximum value of expr. + +For non-numeric fields, values are sorted lexicographically. + +Example:: + + os> source=accounts | stats max(age); + fetched rows / total rows = 1/1 + +----------+ + | max(age) | + |----------| + | 36 | + +----------+ + +Example with text field:: + + os> source=accounts | stats max(firstname); + fetched rows / total rows = 1/1 + +----------------+ + | max(firstname) | + |----------------| + | Nanette | + +----------------+ + +MIN +--- + +Description +>>>>>>>>>>> + +Usage: MIN(expr). Returns the minimum value of expr. + +For non-numeric fields, values are sorted lexicographically. + +Example:: + + os> source=accounts | stats min(age); + fetched rows / total rows = 1/1 + +----------+ + | min(age) | + |----------| + | 28 | + +----------+ + +Example with text field:: + + os> source=accounts | stats min(firstname); + fetched rows / total rows = 1/1 + +----------------+ + | min(firstname) | + |----------------| + | Amber | + +----------------+ + +VAR_SAMP +-------- + +Description +>>>>>>>>>>> + +Usage: VAR_SAMP(expr). Returns the sample variance of expr. + +Example:: + + os> source=accounts | stats var_samp(age); + fetched rows / total rows = 1/1 + +--------------------+ + | var_samp(age) | + |--------------------| + | 10.916666666666666 | + +--------------------+ + +VAR_POP +------- + +Description +>>>>>>>>>>> + +Usage: VAR_POP(expr). Returns the population standard variance of expr. + +Example:: + + os> source=accounts | stats var_pop(age); + fetched rows / total rows = 1/1 + +--------------+ + | var_pop(age) | + |--------------| + | 8.1875 | + +--------------+ + +STDDEV_SAMP +----------- + +Description +>>>>>>>>>>> + +Usage: STDDEV_SAMP(expr). Return the sample standard deviation of expr. + +Example:: + + os> source=accounts | stats stddev_samp(age); + fetched rows / total rows = 1/1 + +-------------------+ + | stddev_samp(age) | + |-------------------| + | 3.304037933599835 | + +-------------------+ + +STDDEV_POP +---------- + +Description +>>>>>>>>>>> + +Usage: STDDEV_POP(expr). Return the population standard deviation of expr. + +Example:: + + os> source=accounts | stats stddev_pop(age); + fetched rows / total rows = 1/1 + +--------------------+ + | stddev_pop(age) | + |--------------------| + | 2.8613807855648994 | + +--------------------+ + +DISTINCT_COUNT, DC +------------------ + +Description +>>>>>>>>>>> + +Usage: DISTINCT_COUNT(expr), DC(expr). Returns the approximate number of distinct values using the HyperLogLog++ algorithm. Both functions are equivalent. + +For details on algorithm accuracy and precision control, see the `OpenSearch Cardinality Aggregation documentation `_. + +Example:: + + os> source=accounts | stats dc(state) as distinct_states, distinct_count(state) as dc_states_alt by gender; + fetched rows / total rows = 4/4 + +-----------------+---------------+--------+ + | distinct_states | dc_states_alt | gender | + |-----------------+---------------+--------| + | 3 | 3 | M | + | 1 | 1 | F | + +-----------------+---------------+--------| + +DISTINCT_COUNT_APPROX +--------------------- + +Description +>>>>>>>>>>> + +Usage: DISTINCT_COUNT_APPROX(expr). Return the approximate distinct count value of the expr, using the hyperloglog++ algorithm. + +Example:: + + PPL> source=accounts | stats distinct_count_approx(gender); + fetched rows / total rows = 1/1 + +-------------------------------+ + | distinct_count_approx(gender) | + |-------------------------------| + | 2 | + +-------------------------------+ + +EARLIEST +-------- + +Description +>>>>>>>>>>> + +Usage: EARLIEST(field [, time_field]). Return the earliest value of a field based on timestamp ordering. + +* field: mandatory. The field to return the earliest value for. +* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. + +Example:: + + os> source=events | stats earliest(message) by host | sort host; + fetched rows / total rows = 2/2 + +-------------------+---------+ + | earliest(message) | host | + |-------------------+---------| + | Starting up | server1 | + | Initializing | server2 | + +-------------------+---------+ + +Example with custom time field:: + + os> source=events | stats earliest(status, event_time) by category | sort category; + fetched rows / total rows = 2/2 + +------------------------------+----------+ + | earliest(status, event_time) | category | + |------------------------------+----------| + | pending | orders | + | active | users | + +------------------------------+----------+ + +LATEST +------ + +Description +>>>>>>>>>>> + +Usage: LATEST(field [, time_field]). Return the latest value of a field based on timestamp ordering. + +* field: mandatory. The field to return the latest value for. +* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. + +Example:: + + os> source=events | stats latest(message) by host | sort host; + fetched rows / total rows = 2/2 + +------------------+---------+ + | latest(message) | host | + |------------------+---------| + | Shutting down | server1 | + | Maintenance mode | server2 | + +------------------+---------+ + +Example with custom time field:: + + os> source=events | stats latest(status, event_time) by category | sort category; + fetched rows / total rows = 2/2 + +----------------------------+----------+ + | latest(status, event_time) | category | + |----------------------------+----------| + | cancelled | orders | + | inactive | users | + +----------------------------+----------+ + +TAKE +---- + +Description +>>>>>>>>>>> + +Usage: TAKE(field [, size]). Return original values of a field. It does not guarantee on the order of values. + +* field: mandatory. The field must be a text field. +* size: optional integer. The number of values should be returned. Default is 10. + +Example:: + + os> source=accounts | stats take(firstname); + fetched rows / total rows = 1/1 + +-----------------------------+ + | take(firstname) | + |-----------------------------| + | [Amber,Hattie,Nanette,Dale] | + +-----------------------------+ + +PERCENTILE or PERCENTILE_APPROX +------------------------------- + +Description +>>>>>>>>>>> + +Usage: PERCENTILE(expr, percent) or PERCENTILE_APPROX(expr, percent). Return the approximate percentile value of expr at the specified percentage. + +* percent: The number must be a constant between 0 and 100. + +Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref `issue link `_. + +Example:: + + os> source=accounts | stats percentile(age, 90) by gender; + fetched rows / total rows = 2/2 + +---------------------+--------+ + | percentile(age, 90) | gender | + |---------------------+--------| + | 28 | F | + | 36 | M | + +---------------------+--------+ + +Percentile Shortcut Functions +>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + +For convenience, OpenSearch PPL provides shortcut functions for common percentiles: + +- ``PERC(expr)`` - Equivalent to ``PERCENTILE(expr, )`` +- ``P(expr)`` - Equivalent to ``PERCENTILE(expr, )`` + +Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``). + +Example:: + + ppl> source=accounts | stats perc99.5(age); + fetched rows / total rows = 1/1 + +---------------+ + | perc99.5(age) | + |---------------| + | 36 | + +---------------+ + + ppl> source=accounts | stats p50(age); + fetched rows / total rows = 1/1 + +---------+ + | p50(age) | + |---------| + | 32 | + +---------+ + +MEDIAN +------ + +Description +>>>>>>>>>>> + +Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``. + +Example:: + + os> source=accounts | stats median(age); + fetched rows / total rows = 1/1 + +-------------+ + | median(age) | + |-------------| + | 33 | + +-------------+ + +FIRST +----- + +Description +>>>>>>>>>>> + +Usage: FIRST(field). Return the first non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. + +* field: mandatory. The field to return the first value for. + +Example:: + + os> source=accounts | stats first(firstname) by gender; + fetched rows / total rows = 2/2 + +------------------+--------+ + | first(firstname) | gender | + |------------------+--------| + | Nanette | F | + | Amber | M | + +------------------+--------+ + +LAST +---- + +Description +>>>>>>>>>>> + +Usage: LAST(field). Return the last non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. + +* field: mandatory. The field to return the last value for. + +Example:: + + os> source=accounts | stats last(firstname) by gender; + fetched rows / total rows = 2/2 + +-----------------+--------+ + | last(firstname) | gender | + |-----------------+--------| + | Nanette | F | + | Dale | M | + +-----------------+--------+ + +LIST +---- + +Description +>>>>>>>>>>> + +Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved. +The function returns up to 100 values with no guaranteed ordering. + +* expr: The field expression to collect values from. +* This aggregation function doesn't support Array, Struct, Object field types. + +Example with string fields:: + + PPL> source=accounts | stats list(firstname); + fetched rows / total rows = 1/1 + +-------------------------------------+ + | list(firstname) | + |-------------------------------------| + | ["Amber","Hattie","Nanette","Dale"] | + +-------------------------------------+ + +VALUES +------ + +Description +>>>>>>>>>>> + +Usage: VALUES(expr). Collects all unique values from the specified expression into a sorted array. Values are converted to strings, nulls are filtered, and duplicates are removed. + +The maximum number of unique values returned is controlled by the ``plugins.ppl.values.max.limit`` setting: + +* Default value is 0, which means unlimited values are returned +* Can be configured to any positive integer to limit the number of unique values +* See the `PPL Settings <../admin/settings.rst#plugins-ppl-values-max-limit>`_ documentation for more details + +Example with string fields:: + + PPL> source=accounts | stats values(firstname); + fetched rows / total rows = 1/1 + +-------------------------------------+ + | values(firstname) | + |-------------------------------------| + | ["Amber","Dale","Hattie","Nanette"] | + +-------------------------------------+ \ No newline at end of file diff --git a/docs/user/ppl/functions/condition.rst b/docs/user/ppl/functions/condition.rst index 8082d961ab5..0364fa7b116 100644 --- a/docs/user/ppl/functions/condition.rst +++ b/docs/user/ppl/functions/condition.rst @@ -120,7 +120,7 @@ Using with if() for validation messages:: EXISTS ------ -`Because OpenSearch doesn't differentiate null and missing `_. so we can't provide function like ismissing/isnotmissing to test field exist or not. But you can still use isnull/isnotnull for such purpose. +`Since OpenSearch doesn't differentiate null and missing `_, we can't provide functions like ismissing/isnotmissing to test if a field exists or not. But you can still use isnull/isnotnull for such purpose. Example, the account 13 doesn't have email field:: @@ -138,9 +138,9 @@ IFNULL Description >>>>>>>>>>> -Usage: ifnull(field1, field2) return field2 if field1 is null. +Usage: ifnull(field1, field2) returns field2 if field1 is null. -Argument type: all the supported data type, (NOTE : if two parameters has different type, you will fail semantic check.) +Argument type: all the supported data types (NOTE : if two parameters have different types, you will fail semantic check). Return type: any @@ -183,9 +183,9 @@ NULLIF Description >>>>>>>>>>> -Usage: nullif(field1, field2) return null if two parameters are same, otherwise return field1. +Usage: nullif(field1, field2) returns null if two parameters are same, otherwise returns field1. -Argument type: all the supported data type, (NOTE : if two parameters has different type, if two parameters has different type, you will fail semantic check) +Argument type: all the supported data types (NOTE : if two parameters have different types, you will fail semantic check). Return type: any @@ -208,9 +208,9 @@ IF Description >>>>>>>>>>> -Usage: if(condition, expr1, expr2) return expr1 if condition is true, otherwise return expr2. +Usage: if(condition, expr1, expr2) returns expr1 if condition is true, otherwise returns expr2. -Argument type: all the supported data type, (NOTE : if expr1 and expr2 are different type, you will fail semantic check +Argument type: all the supported data types (NOTE : if expr1 and expr2 are different types, you will fail semantic check). Return type: any @@ -255,9 +255,9 @@ CASE Description >>>>>>>>>>> -Usage: case(condition1, expr1, condition2, expr2, ... conditionN, exprN else default) return expr1 if condition1 is true, or return expr2 if condition2 is true, ... if no condition is true, then return the value of ELSE clause. If the ELSE clause is not defined, it returns NULL. +Usage: case(condition1, expr1, condition2, expr2, ... conditionN, exprN else default) returns expr1 if condition1 is true, or returns expr2 if condition2 is true, ... if no condition is true, then returns the value of ELSE clause. If the ELSE clause is not defined, returns NULL. -Argument type: all the supported data type, (NOTE : there is no comma before "else") +Argument type: all the supported data types (NOTE : there is no comma before "else"). Return type: any @@ -308,11 +308,9 @@ COALESCE Description >>>>>>>>>>> -Version: 3.1.0 +Usage: coalesce(field1, field2, ...) returns the first non-null, non-missing value in the argument list. -Usage: coalesce(field1, field2, ...) return the first non-null, non-missing value in the argument list. - -Argument type: all the supported data type. Supports mixed data types with automatic type coercion. +Argument type: all the supported data types. Supports mixed data types with automatic type coercion. Return type: determined by the least restrictive common type among all arguments, with fallback to string if no common type can be determined @@ -406,11 +404,9 @@ ISPRESENT Description >>>>>>>>>>> -Version: 3.1.0 - -Usage: ispresent(field) return true if the field exists. +Usage: ispresent(field) returns true if the field exists. -Argument type: all the supported data type. +Argument type: all the supported data types. Return type: BOOLEAN @@ -434,11 +430,9 @@ ISBLANK Description >>>>>>>>>>> -Version: 3.1.0 - Usage: isblank(field) returns true if the field is null, an empty string, or contains only white space. -Argument type: all the supported data type. +Argument type: all the supported data types. Return type: BOOLEAN @@ -462,11 +456,9 @@ ISEMPTY Description >>>>>>>>>>> -Version: 3.1.0 - Usage: isempty(field) returns true if the field is null or is an empty string. -Argument type: all the supported data type. +Argument type: all the supported data types. Return type: BOOLEAN @@ -489,9 +481,7 @@ EARLIEST Description >>>>>>>>>>> -Version: 3.1.0 - -Usage: earliest(relative_string, field) returns true if the value of field is after the timestamp derived from relative_string relative to the current time. Otherwise, return false. +Usage: earliest(relative_string, field) returns true if the value of field is after the timestamp derived from relative_string relative to the current time. Otherwise, returns false. relative_string: The relative string can be one of the following formats: @@ -545,9 +535,7 @@ LATEST Description >>>>>>>>>>> -Version: 3.1.0 - -Usage: latest(relative_string, field) returns true if the value of field is before the timestamp derived from relative_string relative to the current time. Otherwise, return false. +Usage: latest(relative_string, field) returns true if the value of field is before the timestamp derived from relative_string relative to the current time. Otherwise, returns false. Argument type: relative_string:STRING, field: TIMESTAMP @@ -577,8 +565,6 @@ REGEXP_MATCH Description >>>>>>>>>>> -Version: 3.3.0 - Usage: regexp_match(string, pattern) returns true if the regular expression pattern finds a match against any substring of the string value, otherwise returns false. The function uses Java regular expression syntax for the pattern. diff --git a/docs/user/ppl/functions/json.rst b/docs/user/ppl/functions/json.rst index 26db6a6da93..61377847e04 100644 --- a/docs/user/ppl/functions/json.rst +++ b/docs/user/ppl/functions/json.rst @@ -39,10 +39,6 @@ JSON Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json(value)` Evaluates whether a string can be parsed as a json-encoded string. Returns the value if valid, null otherwise. Argument type: STRING @@ -94,10 +90,6 @@ JSON_OBJECT Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_object(key1, value1, key2, value2...)` create a json object string with key value pairs. The key must be string. Argument type: key1: STRING, value1: ANY, key2: STRING, value2: ANY ... @@ -120,10 +112,6 @@ JSON_ARRAY Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_array(element1, element2, ...)` create a json array string with elements. Argument type: element1: ANY, element2: ANY ... @@ -146,10 +134,6 @@ JSON_ARRAY_LENGTH Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_array_length(value)` parse the string to json array and return size,, null is returned in case of any other valid JSON string, null or an invalid JSON. Argument type: value: A JSON STRING @@ -180,10 +164,6 @@ JSON_EXTRACT Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_extract(json_string, path1, path2, ...)` Extracts values using the specified JSON paths. If only one path is provided, it returns a single value. If multiple paths are provided, it returns a JSON Array in the order of the paths. If one path cannot find value, return null as the result for this path. The path use "{}" to represent index for array, "{}" means "{*}". Argument type: json_string: STRING, path1: STRING, path2: STRING ... @@ -214,10 +194,6 @@ JSON_DELETE Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_delete(json_string, path1, path2, ...)` Delete values using the specified JSON paths. Return the json string after deleting. If one path cannot find value, do nothing. Argument type: json_string: STRING, path1: STRING, path2: STRING ... @@ -256,10 +232,6 @@ JSON_SET Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_set(json_string, path1, value1, path2, value2...)` Set values to corresponding paths using the specified JSON paths. If one path's parent node is not a json object, skip the path. Return the json string after setting. Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... @@ -290,10 +262,6 @@ JSON_APPEND Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_append(json_string, path1, value1, path2, value2...)` Append values to corresponding paths using the specified JSON paths. If one path's target node is not an array, skip the path. Return the json string after setting. Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... @@ -332,10 +300,6 @@ JSON_EXTEND Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_extend(json_string, path1, value1, path2, value2...)` Extend values to corresponding paths using the specified JSON paths. If one path's target node is not an array, skip the path. The function will try to parse the value as an array. If it can be parsed, extend it to the target array. Otherwise, regard the value a single one. Return the json string after setting. Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... @@ -374,10 +338,6 @@ JSON_KEYS Description >>>>>>>>>>> -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - Usage: `json_keys(json_string)` Return the key list of the Json object as a Json array. Otherwise, return null. Argument type: json_string: A JSON STRING diff --git a/docs/user/ppl/functions/statistical.rst b/docs/user/ppl/functions/statistical.rst index f87cc104872..3729c1991ca 100644 --- a/docs/user/ppl/functions/statistical.rst +++ b/docs/user/ppl/functions/statistical.rst @@ -17,7 +17,7 @@ Description Usage: max(x, y, ...) returns the maximum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the maximum string value (lexicographically ordered) -Note: This function is only available in the eval command context and requires Calcite engine to be enabled. +Note: This function is only available in the eval command context. Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments @@ -67,7 +67,7 @@ Description Usage: min(x, y, ...) returns the minimum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the minimum numeric value. -Note: This function is only available in the eval command context and requires Calcite engine to be enabled. +Note: This function is only available in the eval command context. Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 04a31827579..981b2de3169 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -48,115 +48,85 @@ The query start with search command and then flowing a set of command delimited * **Commands** - - `Syntax `_ + The following commands are available in PPL: + + **Note:** Experimental commands are ready for use, but specific parameters may change based on feedback. + + ============================================================== ================== ======================== ============================================================================================== + Command Name Version Introduced Current Status Command Description + ============================================================== ================== ======================== ============================================================================================== + `search command `_ 1.0 stable (since 1.0) Retrieve documents from the index. + `where command `_ 1.0 stable (since 1.0) Filter the search result using boolean expressions. + `subquery command `_ 3.0 experimental (since 3.0) Embed one PPL query inside another for complex filtering and data retrieval operations. + `fields command `_ 1.0 stable (since 1.0) Keep or remove fields from the search result. + `rename command `_ 1.0 stable (since 1.0) Rename one or more fields in the search result. + `eval command `_ 1.0 stable (since 1.0) Evaluate an expression and append the result to the search result. + `replace command `_ 3.4 experimental (since 3.4) Replace text in one or more fields in the search result + `fillnull command `_ 3.0 experimental (since 3.0) Fill null with provided value in one or more fields in the search result. + `expand command `_ 3.1 experimental (since 3.1) Transform a single document into multiple documents by expanding a nested array field. + `flatten command `_ 3.1 experimental (since 3.1) Flatten a struct or an object field into separate fields in a document. + `table command `_ 3.3 experimental (since 3.3) Keep or remove fields from the search result using enhanced syntax options. + `stats command `_ 1.0 stable (since 1.0) Calculate aggregation from search results. + `eventstats command `_ 3.1 experimental (since 3.1) Calculate aggregation statistics and add them as new fields to each event. + `streamstats command `_ 3.4 experimental (since 3.4) Calculate cumulative or rolling statistics as events are processed in order. + `bin command `_ 3.3 experimental (since 3.3) Group numeric values into buckets of equal intervals. + `timechart command `_ 3.3 experimental (since 3.3) Create time-based charts and visualizations. + `chart command `_ 3.4 experimental (since 3.4) Apply statistical aggregations to search results and group the data for visualizations. + `trendline command `_ 3.0 experimental (since 3.0) Calculate moving averages of fields. + `sort command `_ 1.0 stable (since 1.0) Sort all the search results by the specified fields. + `reverse command `_ 3.2 experimental (since 3.2) Reverse the display order of search results. + `head command `_ 1.0 stable (since 1.0) Return the first N number of specified results after an optional offset in search order. + `dedup command `_ 1.0 stable (since 1.0) Remove identical documents defined by the field from the search result. + `top command `_ 1.0 stable (since 1.0) Find the most common tuple of values of all fields in the field list. + `rare command `_ 1.0 stable (since 1.0) Find the least common tuple of values of all fields in the field list. + `parse command `_ 1.3 stable (since 1.3) Parse a text field with a regular expression and append the result to the search result. + `grok command `_ 2.4 stable (since 2.4) Parse a text field with a grok pattern and append the results to the search result. + `rex command `_ 3.3 experimental (since 3.3) Extract fields from a raw text field using regular expression named capture groups. + `regex command `_ 3.3 experimental (since 3.3) Filter search results by matching field values against a regular expression pattern. + `spath command `_ 3.3 experimental (since 3.3) Extract fields from structured text data. + `patterns command `_ 2.4 stable (since 2.4) Extract log patterns from a text field and append the results to the search result. + `join command `_ 3.0 stable (since 3.0) Combine two datasets together. + `append command `_ 3.3 experimental (since 3.3) Append the result of a sub-search to the bottom of the input search results. + `appendcol command `_ 3.1 experimental (since 3.1) Append the result of a sub-search and attach it alongside the input search results. + `lookup command `_ 3.0 experimental (since 3.0) Add or replace data from a lookup index. + `multisearch command `_ 3.4 experimental (since 3.4) Execute multiple search queries and combine their results. + `ml command `_: 2.5 stable (since 2.5) Apply machine learning algorithms to analyze data. + `kmeans command `_ 1.3 stable (since 1.3) Apply the kmeans algorithm on the search result returned by a PPL command. + `ad command `_ 1.3 deprecated (since 2.5) Apply Random Cut Forest algorithm on the search result returned by a PPL command. + `describe command `_ 2.1 stable (since 2.1) Query the metadata of an index. + `explain command `_ 3.1 stable (since 3.1) Explain the plan of query. + `show datasources command `_ 2.4 stable (since 2.4) Query datasources configured in the PPL engine. + ============================================================== ================== ======================== ============================================================================================== + + - `Syntax `_ - PPL query structure and command syntax formatting - - `ad command `_ - - - `append command `_ - - - `appendcol command `_ - - - `bin command `_ - - - `dedup command `_ - - - `describe command `_ - - - `eval command `_ - - - `eventstats command `_ - - - `expand command `_ - - - `explain command `_ - - - `fields command `_ - - - `fillnull command `_ - - - `flatten command `_ - - - `grok command `_ - - - `head command `_ - - - `join command `_ - - - `kmeans command `_ - - - `lookup command `_ - - - `ml command `_ - - - `multisearch command `_ - - - `parse command `_ - - - `patterns command `_ - - - `rare command `_ - - - `rename command `_ - - - `regex command `_ - - - `rex command `_ - - - `search command `_ - - - `show datasources command `_ - - - `sort command `_ - - - `spath command `_ - - - `stats command `_ - - - `streamstats command `_ - - - `subquery (aka subsearch) command `_ - - - `reverse command `_ - - - `table command `_ - - - `chart command `_ - - - `timechart command `_ +* **Functions** - - `top command `_ + - `Aggregation Functions `_ - - `trendline command `_ + - `Collection Functions `_ - - `replace command `_ + - `Condition Functions `_ - - `where command `_ + - `Cryptographic Functions `_ -* **Functions** + - `Date and Time Functions `_ - `Expressions `_ - - `Math Functions `_ - - - `Date and Time Functions `_ + - `IP Address Functions `_ - - `String Functions `_ + - `JSON Functions `_ - - `Condition Functions `_ + - `Math Functions `_ - `Relevance Functions `_ - - `Type Conversion Functions `_ + - `String Functions `_ - `System Functions `_ - - `IP Address Functions `_ - - - `Collection Functions `_ - - - `Cryptographic Functions `_ - - - `JSON Functions `_ + - `Type Conversion Functions `_ * **Optimization** From 780e6c36c82f5a55b82f626fb290b2315ef7c742 Mon Sep 17 00:00:00 2001 From: Xinyu Hao <75524174+ishaoxy@users.noreply.github.com> Date: Thu, 20 Nov 2025 01:38:40 +0800 Subject: [PATCH 67/99] add bucket_nullable for eventstats (#4817) --- .../org/opensearch/sql/ast/tree/Window.java | 2 + .../sql/calcite/CalciteRelNodeVisitor.java | 25 ++++++++- docs/user/ppl/cmd/eventstats.rst | 36 ++++++++++++- .../sql/calcite/remote/CalciteExplainIT.java | 10 ++++ .../remote/CalcitePPLEventstatsIT.java | 54 +++++++++++++++++++ .../explain_eventstats_null_bucket.yaml | 11 ++++ .../explain_eventstats_null_bucket.yaml | 11 ++++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 16 ++++-- .../sql/ppl/utils/ArgumentFactory.java | 17 ++++++ .../ppl/calcite/CalcitePPLEventstatsTest.java | 21 ++++++++ 11 files changed, 199 insertions(+), 6 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_null_bucket.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eventstats_null_bucket.yaml diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Window.java b/core/src/main/java/org/opensearch/sql/ast/tree/Window.java index 818e78120ec..fbdf8e163a7 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Window.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Window.java @@ -21,6 +21,8 @@ public class Window extends UnresolvedPlan { private final List windowFunctionList; + private final List groupList; + private final boolean bucketNullable; @ToString.Exclude private UnresolvedPlan child; @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 4848415c360..158c25688f7 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1614,9 +1614,32 @@ private static void buildDedupNotNull( @Override public RelNode visitWindow(Window node, CalcitePlanContext context) { visitChildren(node, context); + + List groupList = node.getGroupList(); + boolean hasGroup = groupList != null && !groupList.isEmpty(); + boolean bucketNullable = node.isBucketNullable(); + List overExpressions = node.getWindowFunctionList().stream().map(w -> rexVisitor.analyze(w, context)).toList(); - context.relBuilder.projectPlus(overExpressions); + + if (hasGroup && !bucketNullable) { + // construct groupNotNull predicate + List groupByList = + groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); + List notNullList = + PlanUtils.getSelectColumns(groupByList).stream() + .map(context.relBuilder::field) + .map(context.relBuilder::isNotNull) + .toList(); + RexNode groupNotNull = context.relBuilder.and(notNullList); + + // wrap each expr: CASE WHEN groupNotNull THEN rawExpr ELSE CAST(NULL AS rawType) END + List wrappedOverExprs = + wrapWindowFunctionsWithGroupNotNull(overExpressions, groupNotNull, context); + context.relBuilder.projectPlus(wrappedOverExprs); + } else { + context.relBuilder.projectPlus(overExpressions); + } return context.relBuilder.peek(); } diff --git a/docs/user/ppl/cmd/eventstats.rst b/docs/user/ppl/cmd/eventstats.rst index 755af0486e4..cf4ac0d9b02 100644 --- a/docs/user/ppl/cmd/eventstats.rst +++ b/docs/user/ppl/cmd/eventstats.rst @@ -40,9 +40,14 @@ The ``stats`` and ``eventstats`` commands are both used for calculating statisti Syntax ====== -eventstats ... [by-clause] +eventstats [bucket_nullable=bool] ... [by-clause] * function: mandatory. An aggregation function or window function. +* bucket_nullable: optional. Controls whether the eventstats command consider null buckets as a valid group in group-by aggregations. When set to ``false``, it will not treat null group-by values as a distinct group during aggregation. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. + + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``bucket_nullable`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``bucket_nullable`` defaults to ``false`` + * by-clause: optional. Groups results by specified fields or expressions. Syntax: by [span-expression,] [field,]... **Default:** aggregation over the entire result set. * span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). For example, ``span(age, 10)`` creates 10-year age buckets, ``span(timestamp, 1h)`` creates hourly buckets. @@ -126,3 +131,32 @@ PPL query:: | 13 | F | 28 | 1 | | 18 | M | 33 | 2 | +----------------+--------+-----+-----+ + +Example 3: Null buckets handling +================================ + +PPL query:: + + os> source=accounts | eventstats bucket_nullable=false count() as cnt by employer | fields account_number, firstname, employer, cnt | sort account_number; + fetched rows / total rows = 4/4 + +----------------+-----------+----------+------+ + | account_number | firstname | employer | cnt | + |----------------+-----------+----------+------| + | 1 | Amber | Pyrami | 1 | + | 6 | Hattie | Netagy | 1 | + | 13 | Nanette | Quility | 1 | + | 18 | Dale | null | null | + +----------------+-----------+----------+------+ + +PPL query:: + + os> source=accounts | eventstats bucket_nullable=true count() as cnt by employer | fields account_number, firstname, employer, cnt | sort account_number; + fetched rows / total rows = 4/4 + +----------------+-----------+----------+-----+ + | account_number | firstname | employer | cnt | + |----------------+-----------+----------+-----| + | 1 | Amber | Pyrami | 1 | + | 6 | Hattie | Netagy | 1 | + | 13 | Nanette | Quility | 1 | + | 18 | Dale | null | 1 | + +----------------+-----------+----------+-----+ diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index c981dfee8cb..c2dce34fc38 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -618,6 +618,16 @@ public void testEventstatsDistinctCountFunctionExplain() throws IOException { assertJsonEqualsIgnoreId(expected, result); } + @Test + public void testEventstatsNullBucketExplain() throws IOException { + String query = + "source=opensearch-sql_test_index_account | eventstats bucket_nullable=false count() by" + + " state"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_eventstats_null_bucket.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + @Test public void testStreamstatsDistinctCountExplain() throws IOException { String query = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java index 9839fff00c4..f1ee8df35ea 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEventstatsIT.java @@ -165,6 +165,40 @@ public void testEventstatsByWithNull() throws IOException { rows("Hello", "USA", "New York", 4, 2023, 30, 1, 30, 30, 30)); } + @Test + public void testEventstatsByWithNullBucket() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eventstats bucket_nullable=false count() as cnt, avg(age) as avg," + + " min(age) as min, max(age) as max by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual, + rows("Kevin", null, null, 4, 2023, null, null, null, null, null), + rows(null, "Canada", null, 4, 2023, 10, 3, 18.333333333333332, 10, 25), + rows("John", "Canada", "Ontario", 4, 2023, 25, 3, 18.333333333333332, 10, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 3, 18.333333333333332, 10, 25), + rows("Jake", "USA", "California", 4, 2023, 70, 2, 50, 30, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 2, 50, 30, 70)); + + actual = + executeQuery( + String.format( + "source=%s | eventstats bucket_nullable=false count() as cnt, avg(age) as avg," + + " min(age) as min, max(age) as max by state", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + verifyDataRows( + actual, + rows(null, "Canada", null, 4, 2023, 10, null, null, null, null), + rows("Kevin", null, null, 4, 2023, null, null, null, null, null), + rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 1, 20, 20, 20), + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 1, 30, 30, 30)); + } + @Test public void testEventstatsBySpan() throws IOException { JSONObject actual = @@ -324,6 +358,26 @@ public void testMultipleEventstatsWithNull() throws IOException { rows("Hello", "USA", "New York", 4, 2023, 30, 30.0, 50.0)); } + @Test + public void testMultipleEventstatsWithNullBucket() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eventstats bucket_nullable=false avg(age) as avg_age by state, country" + + " | eventstats bucket_nullable=false avg(avg_age) as avg_state_age by" + + " country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual, + rows("Kevin", null, null, 4, 2023, null, null, null), + rows(null, "Canada", null, 4, 2023, 10, null, 22.5), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20.0, 22.5), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25.0, 22.5), + rows("Jake", "USA", "California", 4, 2023, 70, 70.0, 50.0), + rows("Hello", "USA", "New York", 4, 2023, 30, 30.0, 50.0)); + } + @Test public void testMultipleEventstatsWithEval() throws IOException { JSONObject actual = diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_null_bucket.yaml new file mode 100644 index 00000000000..ae969892eeb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_eventstats_null_bucket.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], count()=[CASE(IS NOT NULL($7), COUNT() OVER (PARTITION BY $7), null:BIGINT)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[null:BIGINT], expr#14=[CASE($t11, $t12, $t13)], proj#0..10=[{exprs}], count()=[$t14]) + EnumerableWindow(window#0=[window(partition {7} aggs [COUNT()])]) + EnumerableCalc(expr#0..10=[{inputs}], expr#11=[IS NOT NULL($t7)], proj#0..11=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eventstats_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eventstats_null_bucket.yaml new file mode 100644 index 00000000000..ad8f22e9421 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_eventstats_null_bucket.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], count()=[CASE(IS NOT NULL($7), COUNT() OVER (PARTITION BY $7), null:BIGINT)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[null:BIGINT], expr#14=[CASE($t11, $t12, $t13)], proj#0..10=[{exprs}], count()=[$t14]) + EnumerableWindow(window#0=[window(partition {7} aggs [COUNT()])]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t7)], proj#0..10=[{exprs}], $11=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 8988ddf1491..caf636942fd 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -255,7 +255,7 @@ dedupSplitArg ; eventstatsCommand - : EVENTSTATS eventstatsAggTerm (COMMA eventstatsAggTerm)* (statsByClause)? + : EVENTSTATS (bucketNullableArg)? eventstatsAggTerm (COMMA eventstatsAggTerm)* (statsByClause)? ; streamstatsCommand diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index ed66682a981..3ffff5f9442 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -481,14 +481,24 @@ public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { /** Eventstats command. */ public UnresolvedPlan visitEventstatsCommand(OpenSearchPPLParser.EventstatsCommandContext ctx) { + // 1. Parse arguments from the eventstats command + List argExprList = ArgumentFactory.getArgumentList(ctx, settings); + ArgumentMap arguments = ArgumentMap.of(argExprList); + + // bucket_nullable + boolean bucketNullable = + (Boolean) arguments.getOrDefault(Argument.BUCKET_NULLABLE, Literal.TRUE).getValue(); + + // 2. Build groupList + List groupList = getPartitionExprList(ctx.statsByClause()); + ImmutableList.Builder windownFunctionListBuilder = new ImmutableList.Builder<>(); for (OpenSearchPPLParser.EventstatsAggTermContext aggCtx : ctx.eventstatsAggTerm()) { UnresolvedExpression windowFunction = internalVisitExpression(aggCtx.windowFunction()); // set partition by list for window function if (windowFunction instanceof WindowFunction) { - ((WindowFunction) windowFunction) - .setPartitionByList(getPartitionExprList(ctx.statsByClause())); + ((WindowFunction) windowFunction).setPartitionByList(groupList); } String name = aggCtx.alias == null @@ -498,7 +508,7 @@ public UnresolvedPlan visitEventstatsCommand(OpenSearchPPLParser.EventstatsComma windownFunctionListBuilder.add(alias); } - return new Window(windownFunctionListBuilder.build()); + return new Window(windownFunctionListBuilder.build(), groupList, bucketNullable); } /** Streamstats command. */ diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 41e9e91535b..9fab9ba9a0f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -26,6 +26,7 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DecimalLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DefaultSortFieldContext; +import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EventstatsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.IntegerLiteralContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.PrefixSortFieldContext; @@ -111,6 +112,22 @@ public static List getArgumentList(StreamstatsCommandContext ctx) { : new Argument("global", new Literal(true, DataType.BOOLEAN))); } + /** + * Get list of {@link Argument}. + * + * @param ctx EventstatsCommandContext instance + * @return the list of arguments fetched from the eventstats command + */ + public static List getArgumentList(EventstatsCommandContext ctx, Settings settings) { + return Collections.singletonList( + ctx.bucketNullableArg() != null && !ctx.bucketNullableArg().isEmpty() + ? new Argument( + Argument.BUCKET_NULLABLE, getArgumentValue(ctx.bucketNullableArg().bucket_nullable)) + : new Argument( + Argument.BUCKET_NULLABLE, + legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE)); + } + /** * Get list of {@link Argument}. * diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsTest.java index cd808621407..24f489a739f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLEventstatsTest.java @@ -70,4 +70,25 @@ public void testEventstatsAvg() { + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testEventstatsNullBucket() { + String ppl = "source=EMP | eventstats bucket_nullable=false avg(SAL) by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], avg(SAL)=[CASE(IS NOT NULL($7), /(SUM($5) OVER (PARTITION" + + " BY $7), CAST(COUNT($5) OVER (PARTITION BY $7)):DOUBLE NOT NULL), null:DOUBLE)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, CASE WHEN" + + " `DEPTNO` IS NOT NULL THEN (SUM(`SAL`) OVER (PARTITION BY `DEPTNO` RANGE BETWEEN" + + " UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) / CAST(COUNT(`SAL`) OVER (PARTITION" + + " BY `DEPTNO` RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS DOUBLE)" + + " ELSE NULL END `avg(SAL)`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } From f0cba5acb618528c185ab07ce5140689356d8b50 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Sat, 6 Dec 2025 14:57:47 -0800 Subject: [PATCH 68/99] added missing expected output for explain test Signed-off-by: Asif Bashar --- .../calcite/explain_add_col_totals.yaml | 18 +++++++++++++++ .../calcite/explain_add_totals.yaml | 22 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml new file mode 100644 index 00000000000..0a8139b1eaa --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml @@ -0,0 +1,18 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalUnion(all=[true]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(account_number=[null:BIGINT], firstname=[null:VARCHAR], address=[null:VARCHAR], balance=[$0], gender=[null:VARCHAR], city=[null:VARCHAR], employer=[null:VARCHAR], state=[null:VARCHAR], age=[$1], email=[null:VARCHAR], lastname=[null:VARCHAR], _id=[null:VARCHAR], _index=[null:VARCHAR], _score=[null:REAL], _maxscore=[null:REAL], _sort=[null:BIGINT], _routing=[null:VARCHAR]) + LogicalAggregate(group=[{}], balance=[SUM($3)], age=[SUM($8)]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableUnion(all=[true]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], account_number=[$t2], firstname=[$t3], address=[$t3], balance=[$t0], gender=[$t3], city=[$t3], employer=[$t3], state=[$t3], age=[$t1], email=[$t3], lastname=[$t3]) + EnumerableAggregate(group=[{}], balance=[SUM($0)], age=[SUM($1)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml new file mode 100644 index 00000000000..0c8b4ec26a2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml @@ -0,0 +1,22 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], CustomSum=[$17], all_emp_total=[$18]) + LogicalUnion(all=[true]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], CustomSum=[+($3, $8)], all_emp_total=[null:VARCHAR(13)]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalProject(account_number=[null:BIGINT], firstname=[null:VARCHAR], address=[null:VARCHAR], balance=[$0], gender=[null:VARCHAR], city=[null:VARCHAR], employer=[null:VARCHAR], state=[null:VARCHAR], age=[$1], email=[null:VARCHAR], lastname=[null:VARCHAR], _id=[null:VARCHAR], _index=[null:VARCHAR], _score=[null:REAL], _maxscore=[null:REAL], _sort=[null:BIGINT], _routing=[null:VARCHAR], CustomSum=[null:BIGINT], all_emp_total=['ColTotal':VARCHAR(13)]) + LogicalAggregate(group=[{}], balance=[SUM($0)], age=[SUM($1)]) + LogicalProject(balance=[$3], age=[$8]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableUnion(all=[true]) + EnumerableCalc(expr#0..10=[{inputs}], expr#11=[+($t3, $t8)], expr#12=[null:VARCHAR(13)], proj#0..12=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=['ColTotal':VARCHAR(13)], account_number=[$t2], firstname=[$t3], address=[$t3], balance=[$t0], gender=[$t3], city=[$t3], employer=[$t3], state=[$t3], age=[$t1], email=[$t3], lastname=[$t3], CustomSum=[$t2], all_emp_total=[$t4]) + EnumerableAggregate(group=[{}], balance=[SUM($0)], age=[SUM($1)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) From 39cbebe5ec4ff9b84b2ace8573e47076452b27e1 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Sat, 6 Dec 2025 15:02:37 -0800 Subject: [PATCH 69/99] Revert "added missing expected output for explain test" This reverts commit 77ceb6e4a6246428af0f4a416c059c1c67232b6b. Signed-off-by: Asif Bashar --- .../calcite/explain_add_col_totals.yaml | 18 --------------- .../calcite/explain_add_totals.yaml | 22 ------------------- 2 files changed, 40 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml deleted file mode 100644 index 0a8139b1eaa..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_col_totals.yaml +++ /dev/null @@ -1,18 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalUnion(all=[true]) - LogicalSort(fetch=[5]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalProject(account_number=[null:BIGINT], firstname=[null:VARCHAR], address=[null:VARCHAR], balance=[$0], gender=[null:VARCHAR], city=[null:VARCHAR], employer=[null:VARCHAR], state=[null:VARCHAR], age=[$1], email=[null:VARCHAR], lastname=[null:VARCHAR], _id=[null:VARCHAR], _index=[null:VARCHAR], _score=[null:REAL], _maxscore=[null:REAL], _sort=[null:BIGINT], _routing=[null:VARCHAR]) - LogicalAggregate(group=[{}], balance=[SUM($3)], age=[SUM($8)]) - LogicalSort(fetch=[5]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - physical: | - EnumerableLimit(fetch=[10000]) - EnumerableUnion(all=[true]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], account_number=[$t2], firstname=[$t3], address=[$t3], balance=[$t0], gender=[$t3], city=[$t3], employer=[$t3], state=[$t3], age=[$t1], email=[$t3], lastname=[$t3]) - EnumerableAggregate(group=[{}], balance=[SUM($0)], age=[SUM($1)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml deleted file mode 100644 index 0c8b4ec26a2..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_add_totals.yaml +++ /dev/null @@ -1,22 +0,0 @@ -calcite: - logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], CustomSum=[$17], all_emp_total=[$18]) - LogicalUnion(all=[true]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], CustomSum=[+($3, $8)], all_emp_total=[null:VARCHAR(13)]) - LogicalSort(fetch=[5]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalProject(account_number=[null:BIGINT], firstname=[null:VARCHAR], address=[null:VARCHAR], balance=[$0], gender=[null:VARCHAR], city=[null:VARCHAR], employer=[null:VARCHAR], state=[null:VARCHAR], age=[$1], email=[null:VARCHAR], lastname=[null:VARCHAR], _id=[null:VARCHAR], _index=[null:VARCHAR], _score=[null:REAL], _maxscore=[null:REAL], _sort=[null:BIGINT], _routing=[null:VARCHAR], CustomSum=[null:BIGINT], all_emp_total=['ColTotal':VARCHAR(13)]) - LogicalAggregate(group=[{}], balance=[SUM($0)], age=[SUM($1)]) - LogicalProject(balance=[$3], age=[$8]) - LogicalSort(fetch=[5]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - physical: | - EnumerableLimit(fetch=[10000]) - EnumerableUnion(all=[true]) - EnumerableCalc(expr#0..10=[{inputs}], expr#11=[+($t3, $t8)], expr#12=[null:VARCHAR(13)], proj#0..12=[{exprs}]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=['ColTotal':VARCHAR(13)], account_number=[$t2], firstname=[$t3], address=[$t3], balance=[$t0], gender=[$t3], city=[$t3], employer=[$t3], state=[$t3], age=[$t1], email=[$t3], lastname=[$t3], CustomSum=[$t2], all_emp_total=[$t4]) - EnumerableAggregate(group=[{}], balance=[SUM($0)], age=[SUM($1)]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) From 34eeb4a054d5bf8de18db3fa1d4a44f4705a510e Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 20 Nov 2025 14:48:18 +0800 Subject: [PATCH 70/99] [3.4.0] Bump Gradle to 9.2.0 and GitHub Action JDK to 25 (#4824) * [3.4.0] Bump Gradle to 9.2.0 and GitHub Action JDK to 25 Signed-off-by: Lantao Jin * fix UT Signed-off-by: Lantao Jin * fix SpotlessCheck issue Signed-off-by: Lantao Jin --------- Signed-off-by: Lantao Jin --- .../workflows/integ-tests-with-security.yml | 4 +- .github/workflows/sql-pitest.yml | 2 +- .../workflows/sql-test-and-build-workflow.yml | 12 +- .github/workflows/sql-test-workflow.yml | 2 +- api/build.gradle | 2 +- async-query-core/build.gradle | 2 +- async-query/build.gradle | 10 + benchmarks/build.gradle | 2 +- build.gradle | 14 +- common/build.gradle | 2 +- core/build.gradle | 6 +- .../sql/calcite/CalciteRelNodeVisitor.java | 5 +- .../sql/calcite/ExtendedRexBuilder.java | 5 +- .../sql/calcite/type/ExprSqlType.java | 8 +- .../calcite/utils/OpenSearchTypeFactory.java | 3 +- .../sql/calcite/utils/PlanUtils.java | 26 +- .../utils/UserDefinedFunctionUtils.java | 4 +- .../time/TimeSpanExpressionFactory.java | 11 +- .../datetime/DateTimeConversionUtils.java | 27 +- .../datetime/DateTimeFunctions.java | 10 +- .../expression/function/PPLTypeChecker.java | 41 +- .../function/udf/CryptographicFunction.java | 14 +- .../expression/function/udf/SpanFunction.java | 16 +- .../condition/EnhancedCoalesceFunction.java | 16 +- .../function/udf/ip/CompareIpFunction.java | 12 +- .../datetime/NowLikeFunctionTest.java | 2 +- datasources/build.gradle | 1 + direct-query-core/build.gradle | 1 + direct-query/build.gradle | 1 + gradle/wrapper/gradle-wrapper.jar | Bin 43583 -> 43764 bytes gradle/wrapper/gradle-wrapper.properties | 4 +- gradlew | 9 +- gradlew.bat | 4 +- integ-test/build.gradle | 2 +- .../sql/security/PPLPermissionsIT.java | 76 +- legacy/build.gradle | 2 +- .../sql/legacy/utils/SQLFunctions.java | 4 +- opensearch/build.gradle | 10 +- .../data/type/OpenSearchDataType.java | 2 +- .../opensearch/request/AggregateAnalyzer.java | 145 +-- .../opensearch/request/CaseRangeAnalyzer.java | 5 +- .../opensearch/request/PredicateAnalyzer.java | 20 +- .../scan/AbstractCalciteIndexScan.java | 36 +- .../aggregation/CalciteAggregationScript.java | 9 +- .../opensearch/util/OpenSearchRelOptUtil.java | 2 +- .../request/CaseRangeAnalyzerTest.java | 33 +- .../request/PredicateAnalyzerTest.java | 839 +++++++++--------- .../script/filter/FilterQueryBuilderTest.java | 3 +- plugin/build.gradle | 4 +- ppl/build.gradle | 2 +- .../sql/ppl/antlr/PPLSyntaxParserTest.java | 30 +- .../ppl/calcite/CalcitePPLInSubqueryTest.java | 12 +- prometheus/build.gradle | 1 + protocol/build.gradle | 3 +- sql/build.gradle | 3 +- 55 files changed, 810 insertions(+), 711 deletions(-) diff --git a/.github/workflows/integ-tests-with-security.yml b/.github/workflows/integ-tests-with-security.yml index 398734a80db..aa9f546fa19 100644 --- a/.github/workflows/integ-tests-with-security.yml +++ b/.github/workflows/integ-tests-with-security.yml @@ -22,7 +22,7 @@ jobs: strategy: fail-fast: false matrix: - java: [21, 24] + java: [21, 25] runs-on: ubuntu-latest container: # using the same image which is used by opensearch-build team to build the OpenSearch Distribution @@ -63,7 +63,7 @@ jobs: fail-fast: false matrix: os: [ windows-latest, macos-14 ] - java: [21, 24] + java: [21, 25] runs-on: ${{ matrix.os }} diff --git a/.github/workflows/sql-pitest.yml b/.github/workflows/sql-pitest.yml index 74cf95a439a..695aa7d285d 100644 --- a/.github/workflows/sql-pitest.yml +++ b/.github/workflows/sql-pitest.yml @@ -20,7 +20,7 @@ jobs: needs: Get-CI-Image-Tag strategy: matrix: - java: [21, 24] + java: [21, 25] runs-on: ubuntu-latest container: # using the same image which is used by opensearch-build team to build the OpenSearch Distribution diff --git a/.github/workflows/sql-test-and-build-workflow.yml b/.github/workflows/sql-test-and-build-workflow.yml index 151cf0f16d4..0e7b4c228d2 100644 --- a/.github/workflows/sql-test-and-build-workflow.yml +++ b/.github/workflows/sql-test-and-build-workflow.yml @@ -29,7 +29,7 @@ jobs: strategy: fail-fast: false matrix: - java: [21, 24] + java: [21, 25] test-type: ['unit', 'integration', 'doc'] runs-on: ubuntu-latest container: @@ -106,16 +106,16 @@ jobs: matrix: entry: - { os: windows-latest, java: 21, os_build_args: -PbuildPlatform=windows } - - { os: windows-latest, java: 24, os_build_args: -PbuildPlatform=windows } + - { os: windows-latest, java: 25, os_build_args: -PbuildPlatform=windows } - { os: macos-14, java: 21, os_build_args: '' } - - { os: macos-14, java: 24, os_build_args: '' } + - { os: macos-14, java: 25, os_build_args: '' } test-type: ['unit', 'integration', 'doc'] exclude: # Exclude doctest for Windows - test-type: doc entry: { os: windows-latest, java: 21, os_build_args: -PbuildPlatform=windows } - test-type: doc - entry: { os: windows-latest, java: 24, os_build_args: -PbuildPlatform=windows } + entry: { os: windows-latest, java: 25, os_build_args: -PbuildPlatform=windows } runs-on: ${{ matrix.entry.os }} steps: @@ -184,7 +184,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - java: [21, 24] + java: [21, 25] container: image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} @@ -230,7 +230,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - java: [21, 24] + java: [21, 25] container: image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} diff --git a/.github/workflows/sql-test-workflow.yml b/.github/workflows/sql-test-workflow.yml index 096f1e5fcdc..4baf128f004 100644 --- a/.github/workflows/sql-test-workflow.yml +++ b/.github/workflows/sql-test-workflow.yml @@ -20,7 +20,7 @@ jobs: needs: Get-CI-Image-Tag strategy: matrix: - java: [21, 24] + java: [21, 25] runs-on: ubuntu-latest container: # using the same image which is used by opensearch-build team to build the OpenSearch Distribution diff --git a/api/build.gradle b/api/build.gradle index 0b96acabec1..dfd0e25b902 100644 --- a/api/build.gradle +++ b/api/build.gradle @@ -28,7 +28,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } diff --git a/async-query-core/build.gradle b/async-query-core/build.gradle index ece332519b7..5e9ce267676 100644 --- a/async-query-core/build.gradle +++ b/async-query-core/build.gradle @@ -80,7 +80,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } diff --git a/async-query/build.gradle b/async-query/build.gradle index a1613b5c419..417d2ab798f 100644 --- a/async-query/build.gradle +++ b/async-query/build.gradle @@ -95,12 +95,22 @@ jacocoTestCoverageVerification { rule { element = 'CLASS' excludes = [ + 'org.opensearch.sql.spark.asyncquery.OpenSearchAsyncQueryJobMetadataStorageService', 'org.opensearch.sql.spark.cluster.ClusterManagerEventListener*', 'org.opensearch.sql.spark.cluster.FlintIndexRetention', + 'org.opensearch.sql.spark.cluster.FlintStreamingJobHouseKeeperTask', 'org.opensearch.sql.spark.cluster.IndexCleanup', + 'org.opensearch.sql.spark.config.OpenSearchSparkSubmitParameterModifier', + 'org.opensearch.sql.spark.execution.session.*', + 'org.opensearch.sql.spark.execution.statement.*', // ignore because XContext IOException 'org.opensearch.sql.spark.execution.statestore.StateStore', + 'org.opensearch.sql.spark.execution.statestore.OpenSearchStatementStorageService', + 'org.opensearch.sql.spark.execution.statestore.OpenSearchSessionStorageService', + 'org.opensearch.sql.spark.flint.*', + 'org.opensearch.sql.spark.metrics.OpenSearchMetricsService', 'org.opensearch.sql.spark.rest.*', + 'org.opensearch.sql.spark.response.OpenSearchJobExecutionResponseReader', 'org.opensearch.sql.spark.scheduler.parser.OpenSearchScheduleQueryJobRequestParser', 'org.opensearch.sql.spark.transport.model.*' ] diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle index 05e35348784..3b59b92f943 100644 --- a/benchmarks/build.gradle +++ b/benchmarks/build.gradle @@ -5,7 +5,7 @@ plugins { id 'java-library' - id "me.champeau.jmh" version "0.6.8" + id "me.champeau.jmh" version "0.7.3" } repositories { diff --git a/build.gradle b/build.gradle index 5c4bc4ee7ba..547c2d01dd5 100644 --- a/build.gradle +++ b/build.gradle @@ -80,7 +80,7 @@ plugins { id 'java-library' id "io.freefair.lombok" version "8.14" id 'jacoco' - id 'com.diffplug.spotless' version '7.2.1' + id 'com.diffplug.spotless' version '8.1.0' } // import versions defined in https://github.com/opensearch-project/OpenSearch/blob/main/buildSrc/src/main/java/org/opensearch/gradle/OpenSearchJavaPlugin.java#L94 @@ -113,7 +113,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } @@ -128,8 +128,16 @@ allprojects { } plugins.withId('java') { - sourceCompatibility = targetCompatibility = JavaVersion.VERSION_21 + java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 + } + } + + tasks.withType(Test) { + jvmArgs '-Dnet.bytebuddy.experimental=true' } + configurations.all { resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib:1.9.10" resolutionStrategy.force "net.minidev:json-smart:${versions.json_smart}" diff --git a/common/build.gradle b/common/build.gradle index ac67a003fab..bd7091819c7 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -80,6 +80,6 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } diff --git a/core/build.gradle b/core/build.gradle index e53749d1593..8329a5ff73c 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -26,7 +26,7 @@ plugins { id 'java-library' id "io.freefair.lombok" id 'jacoco' - id 'info.solidsoft.pitest' version '1.9.0' + id 'info.solidsoft.pitest' version '1.19.0-rc.2' id 'java-test-fixtures' id 'com.diffplug.spotless' @@ -38,7 +38,7 @@ repositories { pitest { targetClasses = ['org.opensearch.sql.*'] - pitestVersion = '1.9.0' + pitestVersion = '1.19.0-rc.2' threads = 4 outputFormats = ['HTML', 'XML'] timestampedReports = false @@ -91,7 +91,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 158c25688f7..82651861735 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -451,8 +451,9 @@ private List expandProjectFields( .filter(addedFields::add) .forEach(field -> expandedFields.add(context.relBuilder.field(field))); } - default -> throw new IllegalStateException( - "Unexpected expression type in project list: " + expr.getClass().getSimpleName()); + default -> + throw new IllegalStateException( + "Unexpected expression type in project list: " + expr.getClass().getSimpleName()); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java b/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java index c353271d370..9b8ac7dfc97 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java +++ b/core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java @@ -171,8 +171,9 @@ public RexNode makeCast( "Cannot convert %s to IP, only STRING and IP types are supported", argExprType)); } - default -> throw new SemanticCheckException( - String.format(Locale.ROOT, "Cannot cast from %s to %s", argExprType, udt.name())); + default -> + throw new SemanticCheckException( + String.format(Locale.ROOT, "Cannot cast from %s to %s", argExprType, udt.name())); }; } // Use a custom operator when casting floating point or decimal number to a character type. diff --git a/core/src/main/java/org/opensearch/sql/calcite/type/ExprSqlType.java b/core/src/main/java/org/opensearch/sql/calcite/type/ExprSqlType.java index 8d9b90042dc..9670646e2ee 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/type/ExprSqlType.java +++ b/core/src/main/java/org/opensearch/sql/calcite/type/ExprSqlType.java @@ -54,7 +54,8 @@ public Type getJavaType() { INTEGER, INTERVAL_YEAR, INTERVAL_YEAR_MONTH, - INTERVAL_MONTH -> this.isNullable() ? Integer.class : int.class; + INTERVAL_MONTH -> + this.isNullable() ? Integer.class : int.class; case TIMESTAMP, TIMESTAMP_WITH_LOCAL_TIME_ZONE, TIMESTAMP_TZ, @@ -68,13 +69,14 @@ public Type getJavaType() { INTERVAL_HOUR_SECOND, INTERVAL_MINUTE, INTERVAL_MINUTE_SECOND, - INTERVAL_SECOND -> this.isNullable() ? Long.class : long.class; + INTERVAL_SECOND -> + this.isNullable() ? Long.class : long.class; case SMALLINT -> this.isNullable() ? Short.class : short.class; case TINYINT -> this.isNullable() ? Byte.class : byte.class; case DECIMAL -> BigDecimal.class; case BOOLEAN -> this.isNullable() ? Boolean.class : boolean.class; case DOUBLE, FLOAT -> // sic - this.isNullable() ? Double.class : double.class; + this.isNullable() ? Double.class : double.class; case REAL -> this.isNullable() ? Float.class : float.class; case BINARY, VARBINARY -> ByteString.class; case GEOMETRY -> Geometry.class; diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index d225a797285..c505a431c27 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -240,7 +240,8 @@ public static ExprType convertSqlTypeNameToExprType(SqlTypeName sqlTypeName) { INTERVAL_HOUR_SECOND, INTERVAL_MINUTE, INTERVAL_MINUTE_SECOND, - INTERVAL_SECOND -> INTERVAL; + INTERVAL_SECOND -> + INTERVAL; case ARRAY -> ARRAY; case MAP -> STRUCT; case GEOMETRY -> GEO_POINT; diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index c1c572225fe..cd9abcf32ba 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -156,8 +156,8 @@ static RexNode makeOver( RexWindowBound lowerBound = convert(context, windowFrame.getLower()); RexWindowBound upperBound = convert(context, windowFrame.getUpper()); switch (functionName) { - // There is no "avg" AggImplementor in Calcite, we have to change avg window - // function to `sum over(...).toRex / count over(...).toRex` + // There is no "avg" AggImplementor in Calcite, we have to change avg window + // function to `sum over(...).toRex / count over(...).toRex` case AVG: // avg(x) ==> // sum(x) / count(x) @@ -167,17 +167,17 @@ static RexNode makeOver( context.relBuilder.cast( countOver(context, field, partitions, rows, lowerBound, upperBound), SqlTypeName.DOUBLE)); - // stddev_pop(x) ==> - // power((sum(x * x) - sum(x) * sum(x) / count(x)) / count(x), 0.5) - // - // stddev_samp(x) ==> - // power((sum(x * x) - sum(x) * sum(x) / count(x)) / (count(x) - 1), 0.5) - // - // var_pop(x) ==> - // (sum(x * x) - sum(x) * sum(x) / count(x)) / count(x) - // - // var_samp(x) ==> - // (sum(x * x) - sum(x) * sum(x) / count(x)) / (count(x) - 1) + // stddev_pop(x) ==> + // power((sum(x * x) - sum(x) * sum(x) / count(x)) / count(x), 0.5) + // + // stddev_samp(x) ==> + // power((sum(x * x) - sum(x) * sum(x) / count(x)) / (count(x) - 1), 0.5) + // + // var_pop(x) ==> + // (sum(x * x) - sum(x) * sum(x) / count(x)) / count(x) + // + // var_samp(x) ==> + // (sum(x * x) - sum(x) * sum(x) / count(x)) / (count(x) - 1) case STDDEV_POP: return variance(context, field, partitions, rows, lowerBound, upperBound, true, true); case STDDEV_SAMP: diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java index 32aee242388..f619d966cc8 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java @@ -131,8 +131,8 @@ public static SqlTypeName convertRelDataTypeToSqlTypeName(RelDataType type) { case EXPR_DATE -> SqlTypeName.DATE; case EXPR_TIME -> SqlTypeName.TIME; case EXPR_TIMESTAMP -> SqlTypeName.TIMESTAMP; - // EXPR_IP is mapped to SqlTypeName.OTHER since there is no - // corresponding SqlTypeName in Calcite. + // EXPR_IP is mapped to SqlTypeName.OTHER since there is no + // corresponding SqlTypeName in Calcite. case EXPR_IP -> SqlTypeName.OTHER; case EXPR_BINARY -> SqlTypeName.VARBINARY; default -> type.getSqlTypeName(); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeSpanExpressionFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeSpanExpressionFactory.java index 62ccb807801..a205de2f4c9 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeSpanExpressionFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeSpanExpressionFactory.java @@ -34,14 +34,9 @@ public RexNode createTimeSpanExpression( TimeUnitRegistry.validateSubSecondSpan(config, intervalValue); return switch (config) { - case MICROSECONDS, - MILLISECONDS, - CENTISECONDS, - DECISECONDS, - SECONDS, - MINUTES, - HOURS -> standardHandler.createExpression( - fieldExpr, intervalValue, config, alignmentOffsetMillis, context); + case MICROSECONDS, MILLISECONDS, CENTISECONDS, DECISECONDS, SECONDS, MINUTES, HOURS -> + standardHandler.createExpression( + fieldExpr, intervalValue, config, alignmentOffsetMillis, context); case DAYS -> dayHandler.createExpression(fieldExpr, intervalValue, context); case MONTHS -> monthHandler.createExpression(fieldExpr, intervalValue, context); }; diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/datetime/DateTimeConversionUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/datetime/DateTimeConversionUtils.java index ebc4f56c748..9181ca0eb76 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/datetime/DateTimeConversionUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/datetime/DateTimeConversionUtils.java @@ -30,16 +30,18 @@ public static ExprTimestampValue forceConvertToTimestampValue( ExprValue value, FunctionProperties properties) { return switch (value) { case ExprTimestampValue timestampValue -> timestampValue; - case ExprDateValue dateValue -> (ExprTimestampValue) - ExprValueUtils.timestampValue(dateValue.timestampValue()); - case ExprTimeValue timeValue -> (ExprTimestampValue) - ExprValueUtils.timestampValue(timeValue.timestampValue(properties)); - case ExprStringValue stringValue -> new ExprTimestampValue( - DateTimeParser.parse(stringValue.stringValue())); - default -> throw new ExpressionEvaluationException( - String.format( - "Cannot convert %s to timestamp, only STRING, DATE, TIME and TIMESTAMP are supported", - value.type())); + case ExprDateValue dateValue -> + (ExprTimestampValue) ExprValueUtils.timestampValue(dateValue.timestampValue()); + case ExprTimeValue timeValue -> + (ExprTimestampValue) ExprValueUtils.timestampValue(timeValue.timestampValue(properties)); + case ExprStringValue stringValue -> + new ExprTimestampValue(DateTimeParser.parse(stringValue.stringValue())); + default -> + throw new ExpressionEvaluationException( + String.format( + "Cannot convert %s to timestamp, only STRING, DATE, TIME and TIMESTAMP are" + + " supported", + value.type())); }; } @@ -129,8 +131,9 @@ public static TemporalAmount convertToTemporalAmount(long number, TimeUnit unit) case MICROSECOND -> Duration.ofNanos(number * 1000); case NANOSECOND -> Duration.ofNanos(number); - default -> throw new UnsupportedOperationException( - "No mapping defined for Calcite TimeUnit: " + unit); + default -> + throw new UnsupportedOperationException( + "No mapping defined for Calcite TimeUnit: " + unit); }; } } diff --git a/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunctions.java b/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunctions.java index f5d3c44b7e1..e0f626e54f1 100644 --- a/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunctions.java @@ -2060,23 +2060,23 @@ private DateTimeFormatter getFormatter(int dateAsInt) { // Check below from YYYYMMDD - MMDD which format should be used switch (length) { - // Check if dateAsInt is at least 8 digits long + // Check if dateAsInt is at least 8 digits long case FULL_DATE_LENGTH: return DATE_FORMATTER_LONG_YEAR; - // Check if dateAsInt is at least 6 digits long + // Check if dateAsInt is at least 6 digits long case SHORT_DATE_LENGTH: return DATE_FORMATTER_SHORT_YEAR; - // Check if dateAsInt is at least 5 digits long + // Check if dateAsInt is at least 5 digits long case SINGLE_DIGIT_YEAR_DATE_LENGTH: return DATE_FORMATTER_SINGLE_DIGIT_YEAR; - // Check if dateAsInt is at least 4 digits long + // Check if dateAsInt is at least 4 digits long case NO_YEAR_DATE_LENGTH: return DATE_FORMATTER_NO_YEAR; - // Check if dateAsInt is at least 3 digits long + // Check if dateAsInt is at least 3 digits long case SINGLE_DIGIT_MONTH_DATE_LENGTH: return DATE_FORMATTER_SINGLE_DIGIT_MONTH; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java index bb58a38a109..521764ba7bb 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLTypeChecker.java @@ -600,25 +600,28 @@ private static List> getExprSignatures(FamilyOperandTypeChecker t private static List getExprTypes(SqlTypeFamily family) { List concreteTypes = switch (family) { - case DATETIME -> List.of( - OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP), - OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.DATE), - OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.TIME)); - case NUMERIC -> List.of( - OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER), - OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.DOUBLE)); - // Integer is mapped to BIGINT in family.getDefaultConcreteType - case INTEGER -> List.of( - OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER)); - case ANY, IGNORE -> List.of( - OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.ANY)); - case DATETIME_INTERVAL -> SqlTypeName.INTERVAL_TYPES.stream() - .map( - type -> - OpenSearchTypeFactory.TYPE_FACTORY.createSqlIntervalType( - new SqlIntervalQualifier( - type.getStartUnit(), type.getEndUnit(), SqlParserPos.ZERO))) - .collect(Collectors.toList()); + case DATETIME -> + List.of( + OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP), + OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.DATE), + OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.TIME)); + case NUMERIC -> + List.of( + OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER), + OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.DOUBLE)); + // Integer is mapped to BIGINT in family.getDefaultConcreteType + case INTEGER -> + List.of(OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER)); + case ANY, IGNORE -> + List.of(OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.ANY)); + case DATETIME_INTERVAL -> + SqlTypeName.INTERVAL_TYPES.stream() + .map( + type -> + OpenSearchTypeFactory.TYPE_FACTORY.createSqlIntervalType( + new SqlIntervalQualifier( + type.getStartUnit(), type.getEndUnit(), SqlParserPos.ZERO))) + .collect(Collectors.toList()); default -> { RelDataType type = family.getDefaultConcreteType(OpenSearchTypeFactory.TYPE_FACTORY); if (type == null) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/CryptographicFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/CryptographicFunction.java index bb228a1b0e2..0f46e5cbb1c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/CryptographicFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/CryptographicFunction.java @@ -50,15 +50,17 @@ public Expression implement( public static String getDigest(String input, int algorithm) { return switch (algorithm) { - case 224 -> Hex.encodeHexString( - DigestUtils.getDigest(MessageDigestAlgorithms.SHA_224).digest(input.getBytes())); + case 224 -> + Hex.encodeHexString( + DigestUtils.getDigest(MessageDigestAlgorithms.SHA_224).digest(input.getBytes())); case 256 -> DigestUtils.sha256Hex(input); case 384 -> DigestUtils.sha384Hex(input); case 512 -> DigestUtils.sha512Hex(input); - default -> throw new IllegalArgumentException( - String.format( - "Unsupported SHA2 algorithm: %d. Only 224, 256, 384, and 512 are supported.", - algorithm)); + default -> + throw new IllegalArgumentException( + String.format( + "Unsupported SHA2 algorithm: %d. Only 224, 256, 384, and 512 are supported.", + algorithm)); }; } } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java index ed32872c8e2..f28f12e30b9 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/SpanFunction.java @@ -88,11 +88,12 @@ public Expression implement( } if (SqlTypeUtil.isNull(unitType)) { return switch (call.getType().getSqlTypeName()) { - case BIGINT, INTEGER, SMALLINT, TINYINT -> Expressions.multiply( - Expressions.divide(field, interval), interval); - default -> Expressions.multiply( - Expressions.call(BuiltInMethod.FLOOR.method, Expressions.divide(field, interval)), - interval); + case BIGINT, INTEGER, SMALLINT, TINYINT -> + Expressions.multiply(Expressions.divide(field, interval), interval); + default -> + Expressions.multiply( + Expressions.call(BuiltInMethod.FLOOR.method, Expressions.divide(field, interval)), + interval); }; } else if (fieldType instanceof ExprSqlType exprSqlType) { // TODO: pass in constant arguments when constructing @@ -101,8 +102,9 @@ public Expression implement( case EXPR_DATE -> "evalDate"; case EXPR_TIME -> "evalTime"; case EXPR_TIMESTAMP -> "evalTimestamp"; - default -> throw new IllegalArgumentException( - String.format("Unsupported expr type: %s", exprSqlType.getExprType())); + default -> + throw new IllegalArgumentException( + String.format("Unsupported expr type: %s", exprSqlType.getExprType())); }; ScalarFunctionImpl function = (ScalarFunctionImpl) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java index 719f078ba38..c6ff1a64478 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/condition/EnhancedCoalesceFunction.java @@ -63,18 +63,18 @@ private static ExprValue coerceToType(ExprValue value, String typeName) { return switch (typeName) { case "INTEGER" -> tryConvert(() -> ExprValueUtils.integerValue(value.integerValue()), value); case "BIGINT" -> tryConvert(() -> ExprValueUtils.longValue(value.longValue()), value); - case "SMALLINT", "TINYINT" -> tryConvert( - () -> ExprValueUtils.integerValue(value.integerValue()), value); + case "SMALLINT", "TINYINT" -> + tryConvert(() -> ExprValueUtils.integerValue(value.integerValue()), value); case "DOUBLE" -> tryConvert(() -> ExprValueUtils.doubleValue(value.doubleValue()), value); - case "FLOAT", "REAL" -> tryConvert( - () -> ExprValueUtils.floatValue(value.floatValue()), value); + case "FLOAT", "REAL" -> + tryConvert(() -> ExprValueUtils.floatValue(value.floatValue()), value); case "BOOLEAN" -> tryConvert(() -> ExprValueUtils.booleanValue(value.booleanValue()), value); - case "VARCHAR", "CHAR" -> tryConvert( - () -> ExprValueUtils.stringValue(String.valueOf(value.value())), value); + case "VARCHAR", "CHAR" -> + tryConvert(() -> ExprValueUtils.stringValue(String.valueOf(value.value())), value); case "DATE" -> tryConvert(() -> ExprValueUtils.dateValue(value.dateValue()), value); case "TIME" -> tryConvert(() -> ExprValueUtils.timeValue(value.timeValue()), value); - case "TIMESTAMP" -> tryConvert( - () -> ExprValueUtils.timestampValue(value.timestampValue()), value); + case "TIMESTAMP" -> + tryConvert(() -> ExprValueUtils.timestampValue(value.timestampValue()), value); case "DECIMAL" -> tryConvert(() -> ExprValueUtils.doubleValue(value.doubleValue()), value); default -> value; }; diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java index 96d59a0a704..ce200323f60 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CompareIpFunction.java @@ -99,9 +99,10 @@ public boolean isDeterministic() { case GREATER_THAN_OR_EQUAL -> PPLBuiltinOperators.LTE_IP; case EQUALS -> PPLBuiltinOperators.EQUALS_IP; case NOT_EQUALS -> PPLBuiltinOperators.NOT_EQUALS_IP; - default -> throw new IllegalArgumentException( - String.format( - Locale.ROOT, "CompareIpFunction is not supposed to be of kind: %s", kind)); + default -> + throw new IllegalArgumentException( + String.format( + Locale.ROOT, "CompareIpFunction is not supposed to be of kind: %s", kind)); }; } @@ -151,8 +152,9 @@ private static Expression evalCompareResult(Expression compareResult, SqlKind co case LESS_THAN_OR_EQUAL -> Expressions.lessThanOrEqual(compareResult, zero); case GREATER_THAN -> Expressions.greaterThan(compareResult, zero); case GREATER_THAN_OR_EQUAL -> Expressions.greaterThanOrEqual(compareResult, zero); - default -> throw new UnsupportedOperationException( - String.format(Locale.ROOT, "Unsupported compare type: %s", compareType)); + default -> + throw new UnsupportedOperationException( + String.format(Locale.ROOT, "Unsupported compare type: %s", compareType)); }; } diff --git a/core/src/test/java/org/opensearch/sql/expression/datetime/NowLikeFunctionTest.java b/core/src/test/java/org/opensearch/sql/expression/datetime/NowLikeFunctionTest.java index ab97f3f4df1..8c5661021f5 100644 --- a/core/src/test/java/org/opensearch/sql/expression/datetime/NowLikeFunctionTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/datetime/NowLikeFunctionTest.java @@ -251,7 +251,7 @@ private Temporal extractValue(FunctionExpression func) { return LocalDateTime.ofInstant(func.valueOf().timestampValue(), ZoneOffset.UTC); case TIME: return func.valueOf().timeValue(); - // unreachable code + // unreachable code default: throw new IllegalArgumentException(String.format("%s", func.type())); } diff --git a/datasources/build.gradle b/datasources/build.gradle index cdea790bf97..1dd01d82fb9 100644 --- a/datasources/build.gradle +++ b/datasources/build.gradle @@ -35,6 +35,7 @@ dependencies { testImplementation group: 'org.mockito', name: 'mockito-core', version: "${mockito_version}" testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: "${mockito_version}" testImplementation 'org.junit.jupiter:junit-jupiter:5.9.3' + testRuntimeOnly('org.junit.platform:junit-platform-launcher') } test { diff --git a/direct-query-core/build.gradle b/direct-query-core/build.gradle index 25b0f7869c1..7caffd11ffa 100644 --- a/direct-query-core/build.gradle +++ b/direct-query-core/build.gradle @@ -41,6 +41,7 @@ dependencies { exclude group: 'org.hamcrest', module: 'hamcrest-core' } testImplementation("org.opensearch.test:framework:${opensearch_version}") + testRuntimeOnly('org.junit.platform:junit-platform-launcher') } test { diff --git a/direct-query/build.gradle b/direct-query/build.gradle index e2b70df77b3..2a1dc04f6a1 100644 --- a/direct-query/build.gradle +++ b/direct-query/build.gradle @@ -43,6 +43,7 @@ dependencies { exclude group: 'org.hamcrest', module: 'hamcrest-core' } testImplementation("org.opensearch.test:framework:${opensearch_version}") + testRuntimeOnly('org.junit.platform:junit-platform-launcher') } test { diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index a4b76b9530d66f5e68d973ea569d8e19de379189..1b33c55baabb587c669f562ae36f953de2481846 100644 GIT binary patch delta 34943 zcmXuKV_+Rz)3%+)Y~1X)v28cDZQE*`9qyPrXx!Mg8{4+s*nWFo&-eXbzt+q-bFO1% zb$T* z+;w-h{ce+s>j$K)apmK~8t5)PdZP3^U%(^I<0#3(!6T+vfBowN0RfQ&0iMAo055!% z04}dC>M#Z2#PO7#|Fj;cQ$sH}E-n7nQM_V}mtmG_)(me#+~0gf?s@gam)iLoR#sr( zrR9fU_ofhp5j-5SLDQP{O+SuE)l8x9_(9@h%eY-t47J-KX-1(`hh#A6_Xs+4(pHhy zuZ1YS9axk`aYwXuq;YN>rYv|U`&U67f=tinhAD$+=o+MWXkx_;qIat_CS1o*=cIxs zIgeoK0TiIa7t`r%%feL8VieY63-Aakfi~qlE`d;ZOn8hFZFX|i^taCw6xbNLb2sOS z?PIeS%PgD)?bPB&LaQDF{PbxHrJQME<^cU5b!Hir(x32zy{YzNzE%sx;w=!C z_(A>eZXkQ1w@ASPXc|CWMNDP1kFQuMO>|1X;SHQS8w<@D;5C@L(3r^8qbbm$nTp%P z&I3Ey+ja9;ZiMbopUNc2txS9$Jf8UGS3*}Y3??(vZYLfm($WlpUGEUgQ52v@AD<~Y z#|B=mpCPt3QR%gX*c^SX>9dEqck79JX+gVPH87~q0-T;ota!lQWdt3C-wY1Ud}!j8 z*2x5$^dsTkXj}%PNKs1YzwK$-gu*lxq<&ko(qrQ_na(82lQ$ z7^0Pgg@Shn!UKTD4R}yGxefP2{8sZ~QZY)cj*SF6AlvE;^5oK=S}FEK(9qHuq|Cm! zx6ILQBsRu(=t1NRTecirX3Iv$-BkLxn^Zk|sV3^MJ1YKJxm>A+nk*r5h=>wW*J|pB zgDS%&VgnF~(sw)beMXXQ8{ncKX;A;_VLcq}Bw1EJj~-AdA=1IGrNHEh+BtIcoV+Te z_sCtBdKv(0wjY{3#hg9nf!*dpV5s7ZvNYEciEp2Rd5P#UudfqXysHiXo`pt27R?Rk zOAWL-dsa+raNw9^2NLZ#Wc^xI=E5Gwz~_<&*jqz0-AVd;EAvnm^&4Ca9bGzM_%(n{>je5hGNjCpZJ%5#Z3&4}f3I1P!6?)d65 z-~d}g{g!&`LkFK9$)f9KB?`oO{a0VXFm1`W{w5bAIC5CsyOV=q-Q7Z8YSmyo;$T?K za96q@djtok=r#TdUkd#%`|QlBywo>ifG69&;k%Ahfic6drRP;K{V8ea_t2qbY48uYWlB3Hf6hnqsCO?kYFhV+{i> zo&AE+)$%ag^)ijm!~gU78tD%tB63b_tbv9gfWzS&$r@i4q|PM+!hS+o+DpKfnnSe{ zewFbI3Jc0?=Vz}3>KmVj$qTWkoUS8@k63XRP2m^e50x-5PU<4X!I#q(zj@EyT9K_E z9P%@Sy6Mq`xD<-E!-<3@MLp2Dq8`x}F?@}V6E#A9v6xm%@x1U3>OoFY{fX5qpxngY z+=2HbnEErBv~!yl%f`Eq2%&K%JTwgN1y@FZ#=ai+TFMFlG?UV{M1#%uCi#Knkb_h| z&ivG$>~NQ4Ou2-gy=8JdRe8`nJDsqYYs?)(LJkJ}NHOj|3gZxVQJWWp>+`H?8$$J5 z*_)+tlyII%x#dId3w(oXo`YEm^-|tFNNj-0rbEuUc2-=pZDk7fxWUlw;|@M9s1 zmK9*C)1Q?F5@NPUJOYOAe`GHnYB%G37_sg3dxAttqLs6Bro)4z ziy8j%C7KKDNL8r#Oj6!IHx|N(?%Zvo31y4;*L1%_KJh$v$6XhFkw*E|fEu9`or?JD_ z13X4g92;TZm0jA0!2R5qPD$W^U z`5XK|Y^27y_Q%D>wWGtF=K00-N0;=svka>o`(;~dOS(eT0gwsP{=Rq+-e2Ajq?D<)zww5V36u6^Ta8YT4cDaw} zfuGnhr_5?)D*1+*q<3tVhg(AsKhR1Di=nsJzt_si+)uac_7zx_pl#t(dh816IM zvToHR%D)$!Zj4Q^$s8A%HLRYa>q9dpbh=*kcF7nkM0RhMIOGq^7Tgn|Fvs)A% zznI7nlbWoA2=rHHbUZ4PJMXf{T$@>W1Tt4lb|Or4L;O!oFj8Op8KEE`^x^*VSJ`9~ z;Pe~{V3x*-2c|jBrvSV8s+*Y3VqFKa@Napr#JAd}4l7;sgn|Q#M!(<|IX1<)z!AC3 zv<5YpN58Fs4NYi|ndYcb=jVO6Ztpwd={@3Yp6orUYe6EG#s{qhX+L^7zMK+@cX1hh?gbp56>jX*_Z|2u9 zb*glt!xK>j!LyLnFtxs&1SLkyiL%xbMqgxywI-U*XV%%qwa5oiufFerY!wn*GgMq` zZ6mFf8MukDPHVaCQk#oyg^dhl*9p@Jc+4Q9+0iv?{}=}+&=>n+q{o z#rEZ<&Ku65y+1eRHwcl3G7bR`e{&~^fGg|0))$uW?B@;_sWSls!ctnjH6ykmM8WJx};hvdXZ>YKLS($5`yBK38HULv}&PKRo9k zdFzj>`CDIUbq8GxeIJ?8=61G-XO?7dYZ;xqtlG?qr`wzbh7YyaD=>eup7bVH`q*N5 z)0&n)!*wW$G<3A&l$vJ^Z-%1^NF$n3iPgqr6Yn_SsAsFQw?9fj z&AvH|_-6zethC3^$mLF7mF$mTKT<_$kbV6jMK0f0UonRN_cY?yM6v&IosO?RN=h z{IqdUJvZd#@5qsr_1xVnaRr`ba-7MyU4<_XjIbr$PmPBYO6rLrxC`|5MN zD8ae4rTxau=7125zw|TQsJpqm`~hLs@w_iUd%eMY6IR9{(?;$f^?`&l?U%JfX%JyV z$IdA`V)5CkvPA0yljj4!Ja&Hjx`zIkg_ceQ;4)vhoyBeW$3D<_LDR~M-DPzQQ?&!L*PUNb^moIz|QXB=S z9^9NnEpF+>_Oh6+Xr55ZLJ7`V=H}@D<70NiNGH{~^QE-U)*Sg@O}M|%{Rcpn z{0nD@D%@8!dE*mndd2g!-q9;)jb=IUED<(Pxh`9B>V3z#f>82~&CVZASC?|;C-VKy zJU35T|3jd(p8F|#n@T~Wh2l1yURI=LC>Uj_!8i7-DE_IaSKIMAx`WMEq8kN%8sAx% zOQs~R1v12(=_ghVxzylsYZum-%8QmjM3-s2V!jY|w#ccP)}OSW?MWhNu@o-t0eTg{ zyy`}x+}GObZC(k>-upb2C6#S*NOfWbKEyReP%gay8MT!pJpsx4jwCu%>7%sY}1L6Vybj_P+;yP`YS92 z^o_G!Gr_NP!ixe7d&82H&achfi83L;le3Fs?u%E*xbeOKkJr7mp=)RXjZF;h*hR<= zP_cs1hjc}0JlHal=enmG&G8wsn%Sm$5Wcgs=Zc}}A%3i6_<4k_`-$k2E5f6QV{a$V zg3VZO36o^w5q`q2ASwJw#?n7pBJyGt3R<`Sd8d|52=h&`|CPq&1Cz&42rRCHNjDZL z$}Y*L+#N;!K2Ov){~fmQM8hVYzj3H@{yS>?q3QhhDHWfNAJ#q@qko|rhlaGG4Qrvh zmHpmg&7YvgRuI|i78-{)|wFx(R^_ z{ag(}Kbbbx=UW42sAu}kg3yB#96dJlOB{+or<(51ylVwpXII7Hrlztq!pefQ?6pQhqSb76y=sQx zOC-swAJaqnL_ok{74u_IHojFk;RSSFfjdLrfqq{syUxA$Ld6D2#TMX(Phf~dvSuuX zmN2xzjwZxWHmbvK2M#OhE#{`urOzs=>%ku}nxymK-dB~smas?Z(YM^>x#K)M@?<&L zeagMnj!XK4=Mid$NvJ+JfSjvc`4rX9mTo^+iFs0q7ntZ{gfU3oSAbK_yzW3WA^`6x zWgPSLXlEVvh!G^fOzZ-O{C_v;V6=;DE+ZqRT4mbCq}xeQ0o z98Cho%25r#!cT_ozTd~FK^@AB3OnrAAEDI4==}#I_v}iw0nhA{y99mFRG*1kxFkZP z+are- z8D|3WoYE>s0<=h)^)0>^up+nPeu}Sv-A($6t3AUedFczOLn;NW5_xM0tMvvrOSZ}) zA2YG1m4GxLAHZ5k>%}pHYtf-caXMGcYmH8ZPLX9VCew0;@Pi-8zkH^#}Cu$%FmKJb=!)Twj!PgBmY0+>VUsyyT}Jy>vMt zo<^5lmPo5Jt-=)z2-F{2{jB{CpW2JDj%~JnP*rq^=(okNQpH=}#{kqMUw{&=e-5;G z!FwJVQTDS7YGL&|=vJ+xhg{dMika2m2A#l@$PazLQ<6$GLC+>4B37`4aW3&MgENJ% z#*tOQsg{>zmcuSgU?peLA}!Rlu&K3LTc@drSBaI?91dK75;_`(V`NHjkMj``jwjJx zcm_!liUxn=^!~0|#{g2#AuX9%;GTBq&k+Jz!~Cc+r?S}y=Q1okG0PRIi3C3wgP8F| zO2jcmnVbGXp*Mu&e#a9Q5a}w7$sITx@)8b}sh(v9#V(H$3GLHF@k!Wh+)kNueq;+r zFtj+^b1TQe?R#Y8{m!7~e6%83hbPKoizd2LIg3yS5=X2HE^l4_|(2q#LB zeNv&njrS$?=zzG?0Min#kY+3A)H1uMfogMYSm|vT%3i<_d9X&~N*ZCL4iB@YaJuo; zq}-;EGx~T43kq-UHmTn!@sc z3bwcs$rp?~73h*uZl_ysD*WK3_PS1G3N^t3U=KoRm_Gz@C?M>+x9HRMk(cA4m&L`! z=Lb~4*9zt*SHJgsAMAcTy*!1W^B>4T_doWvNw7UwmyA=Wq&kE{*GVHp9Yk5goUO;k zVb_3ARrFPG;&>Jv@P&`z%}t!*M|2127pm{S)gs~f_ID^lOH@nIW9DgU$=FjqNW0pv z&GYdoxe@)RAWWx^j|$N}sj*p)_bFpk`Y=NilvsI(>!Z&KBo&I+wb*kM5Vvkkr#;q< z3CobbF+GJ#MxL?rMldP0@XiC~yQCR57=wW_<$j!SY*$5J+^v{Pn!1{&@R-lHCiK8@ z&O=XQ=V?hjM;h&qCitHmHKJ_$=`v%;jixnQrve^x9{ykWs(;!Q9mlr#{VYVE93oaW z&z+vBD}!tBghkriZy7gX7xJp8c}ajR4;JDu^0#RdQo2itM^~uc==~eBgwx5-m7vLj zP)vE#k%~*N$bT#^>(C1sohq+DwAC{U*z(D)qjgghKKSy#$dPih`R09rfbfI-FLE!` zn!tg71Wr(D7ZV*4R@GqG&7)2K*Zc6_CMJoGu#Yc>9D#{eyZ>u-mrWG@4Hk(je3lnH zu9qvXdq+!`5R1mlzWjV^jvaHl>-^Z+g^s5dy49yem$0$>341=EGuOY=W5PCFBTbNN^19iIQ57C3KcV}z~z#Rvngs#j;g2gswC(TLWlViYW}tB5T#g4 z%vDUYTo1@+&zE&`P%fXc^@prE5z;E@;; zKtpEFYftJq-c0sD6lKYoEQ;O1X4uFZZ;3gdgfAKqIc=Dj6>unXAdM}DD*@a5LHk~o zyJjW@aK;XG%qr<)7Rqh7NdUpnTR6jc;6{FKcK_v_#h{IO{mez>^^70DAWB5whqq!J zevvLUotE;I?IWWf!ieJ-Hx`TqY5)ND>K0NCb7IW40Jk*J* z^#m%kIA~Go2=R|y5zM|*ehJxyuX;lOQZkArKVbQV(XmidUH|8U^q`wP(7%F}=uG}U z2~&~CLebE`c%SCdeU(l&hryL~+Y)6I^d@|||6F15IAGo`G+CdVf zc+!EycZnQH)OBE zyTd8k{(_v9d2}osA$*>Q>Q&OB(7ShxA$}p8ChVnYlXl5My$HlVx@ATprrj0}6)ycK zcQy#bwOms1CnS+xd26}k?J;WI{HR_U+1T^I!$B^S=pJkT705QaMF88VJp!s%`?y9z8f$&Xw(A}3u_(n5G{!)yH&zN)S?c1$SZlo>XieJ zyEFa>_p9B*cY){ct8=dq>uQTf# zd4vB4)(ebwQHlSAu}(6GCe28H32pz^}l%Zqs;Yl|B=l2d9HrCcUf%wxLYs4CBqJ#{gz*u6V$>?9IT@uSf~2Rgk6CNw;C21ZbNkm>ZTc@2zeOSXVE^>i5!2>t%!1cI z{FZA`*o4=dTDG3&{v$3xVr%g;3d(!SFJU}w6x_Re(ohlni)I54Wg{t zWLK{A(}qEIH@pamgtr3serA{THlp_IR(gt0CFguk={|Ochh10)7UV4DcnO7fvL<=x z^WCMg_TI?U8(loaUnAe+Nc9I1JIO#_C`=kJG(&wy%Cr9vRFcY9^8{A3A>GuSW~Zk( zMA#t~0Dw?;3^Ue|lhSp4p%YvYmw-&3ey3}+{6Uhz?l1D|6nYNok6?4N_C!OSR=QtS z2X&QtWlkZshPo#-dXBOlSqh3D;#*_`hyohR>vl$W+QC>HPOs0zwHKN`?zIKqCTw&w&NUGNS|abulHe{D+{q z`WvLw?C4K97cd}6V6f2NtfIAO;=c>qi^+y4#oMjK?5Hy9$Tg1#S~Cxoo-Zdpnt2kG^n}`9)Df-Spvx&Oi+6xXT=N*0l|d`p!ZU ziQo9$y}PYIF~Zqh^?6QZ8YS*JtD^gynifSLMlVYRhBi*f-mJFS<>l%5sp5$V$p*X9?V-0r4bKYvo3n@XkCm4vO-_v? zOsLkR?)>ogb>Ys*m^2>*6%Db0!J?Qvpyd+ODlbslPci9r#W>d~%vcU7J_V;#Um1+` zG0>Q$TrOLUF0%a3g=PaCdQVoUUWXgk>($39-P;tusnMlJ=Dz}#S|E== zl6b3bbYaYguw3Bpv|O(YR2aBk?(jo+QqN*^6f0x+to-@2uj!nu6X{qLK>*PxM!i0C zZwrQ}prOw6Ghz?ApvM`!L3Dzc@6mp<2hO0y{_`lqtt!FcUmBG+PBwl?>0Mwu)Ey{L zU;A{ywkT}jCZpPKH4`_o0$#4*^L7=29%)~!L4*czG!bAva#7ZCDR|6@lBE&cyy5eE zlKHwzv7R9gKZTF<8}3*8uVtI)!HE%AZRD-iW!AJI7oY43@9Z$0^MO@Egj1c?o(BwF ziz1|k#WOgAG?^r1 z>+p=DK?cA-RLIvcdmwq$q?R;ina0SPj@;Mus}W_V2xHnYhOq~=sxzA`yTUOsJ`8`VOSTE=IZ!x`cZYqHbgPijF>J>N7( zqbNsHK50vkB1NI52gyb^PflpU0DRw{&v7Y}Hy2>pV@W2f1EOd2j;H?|WiV%2?Dk7u zS(NrEUDl81<}yY9J#OCwM)N?x&PB-%1{oD*`_ZLiBJ=16uR{n+Lk~!t(&9U#>ZfVd8Iqn&idGd>uo?L@sjm>c|Lk z12d3Y>N9U`342@xaHl&Q@oE5V-f$s`04q983f0#m_WF=X_A89W8C#{uCdTNUZ+))$ zakPyNU)?MDayCKxWh0(-v~1rd8FxocW=Dc6B1%N4^SgQj$?ZMoAMQ-35)IMgf&)M?c@}4QG7=DTq{nHc7yp=CZ z1dh~VkK%OTr23U1mJ*a-DxX0Psvh_13t^YcPl9t?_^$pPEhhwGp}s~f=GFR;4@;@f z@B;R1U6Df?yl#Y=BgYTlP&<|8K27||rx_?{s|L);GM3^{Nn8HZp zFqxiG6s3Nb;PW3O=u;(-o(*q!^2i)jHY%N@;O5Hder~_@$zh4xG#-7?#S^-&M~yc} zh5Y=ltLBnTzt;Y%YNqi2d1M1LOz?MJbZ|Nc6>x19&l_S*2Rgk$DhaP7Y-C)4_uPzf zQm)OY)$AFfE1(0SxkbbN4}CHnlU`RqYFGIE7S9ipx_Q0vkE5JRq4Uc%zV7$?y(x$y zV^)5zwjH~+4?xN z9s@x~w`C_cS}khfI14K4Xgn^iuBxkd^u}3cY=VZI@-8iWHolPtt?JD5lZ1V=@g6yR zj0>bd7Z(dw+@)v#r!xpZaAxgT?4Ton(h`0}fkfF!ZDSu{f*r#{ZRp^oOrO3iB|Fa- z;|+PpW5JKZxJ-kjHf`-7ohmnO=a)Xl9lhI8&$)g6R#6PBIN$QSC8kT=4zj?w&=`!qjkCvvz;ypOfR7P)w^ z-7LFhXd6GLrFa_vGLwR5MRvcV*(r!NhQ@}T-ikBGy!fHaiePD$iA{|Q1$kct2`qHz z6nAyERuqvM6i2^?g@w7W2LLr~3s?pBDk6ce8@CxV;b%4%-rXK-GOk+($sSNK;_FBku zm89B}tpzL-x{dPS-IAjwyL*t7N%7~2E)9OsWJJWHc|}BNa5Xwdx(j7i7AmZhs?#zi z5{y$uQdx?O8x3>+5MR05HwUa-YZa*|UVLOb`T)KHk|~Gmwx8MfBUtM|afuM$0wb7m zR+_lU9=W~Y$uNlxt&(@&1;6t!r69A|W%;k3-%SzLlBzc0 z`b?Jmo`8{LI=d|I3JDAa|iK*D6=I_3q?%xFSLg1 zI^!pA=K}l1joBBj8aa8XHp^;Lf`9xNa&Cv+twW&$_HAwZfHrVcNUrRccn_ z1+L!z$k@LK28nc1VB|Fbwm$wO;B~yEdww1EUn|s&{-Tu;@$d94BLL(OQYx|aCa|&2WPT{qJzbNU!ep>j){o5=6le6 z>~Amqs+mCuOR2)aB!#sK5fuui7LsO!Qzl)lz?Lm!QoQFWbNIkfdkrn|)YbSu8WwxZ zO{}a~wE2Cu)`a3X+KI#LHm(Mi+}bOB6@N~H2}Y)e*}w8_z^Sx`c?CWvu*2{K#yqGo zx!Cu*+8&tdw!eiKqZIQlJg5Cb^hZ^Zh~Mb0l(4m4hc1mP&>oTdt7eS-bEz8mU~oObme{^%56|ou~EPOSFBa7VpUZC z0gVc<@IUeo~q)&?o zU@=bz-qfWm)&0Qn@W_fc9{wx={&-#8>0xHJ-+Ijl#P&1qB-%*KUU*DCPkKCLzF*#t z0U_vrk1(&Vwy6Vm8@#Th3J5J%5ZWd)G0mifB3onY8dA&%g6Hir5gqMH|hnEBL0VVvl~aJjdljF$-X@a zMg=J-bI?2LGw-8mHVF7Jbsk1K4LgWi7U>~QovGT2*t^U&XF#iDs_E$~G+t;U;tZn_@73Y6x>vU%x` z6?l`$@U4JYYe#|GcI^f+rsy|MdB|`PQunKSKkja4IGtj9G6buN&ZSnYi|ieaf{k5q z@ABM@!S(A6Y}Sv~YJcB;9JeqsM|-fPIZZfOgc*FSzIpEdT=YYT(R(z{(~X&x%6ZM1 zY0(|PepBl4dK*@9n6@`rUMd)K^^0!^?U-1rrB*b?LEZe<5taFp!NoC^lc>}YUy?5FjT9tFmC+%%DYNa+L zWr)zMB%y_6L{S%;dk6bJPO!wmT=wPPK1b$%+ffWcO8;2T+7C28T?{!96{%d`0G~j3 z)6g<%$dC{vAKJ22nY)fnxlD>P_Xb&@>wrG+ZpfQ%RX=R2kd@bH3N*M8=BO zi|Z$Z5e`0NcU5&aN_DST8O@4v3vroq3t<_5hBX;d)*AJgWPb~p=qx4}^Ms6pgyY`) zu z^|u7XSP^~b1)*61r(}zd!JOny@$KviSp>L|jSR!u*1IgKwId5jmAi2`qe%u+XCTwU z;a62_a~Z}TqDJ?6lje5hblv1f1(6U@kWpc)z|&nRBV*UIieQR{Rru*|$L2SzxtL&| z7abeg@xniYhexYoN6zxY{nI^*xKW0Gz8D~}tE>O4iCkpWn8wt4?S`(Ftv?<8vIvbw z(FFd5`p4~#m<(3uv2+pv7uVC$R(iZuhnxFEY{o}BxPg2nYK zzOjuMR`}t3{8z#zfLXy||4JCt|1nv5VFjS#|JEhRLI>(-;Rh~J7gK{as*K1{IJ%7F zoZnXx&Y54ABfp9q!HDWAJlvFFdSC9}J*llUYXFDN8meEa<0}s z8M~X?%iKLB$*-a}G_$rTh;U{M0vc<}N#PVAE1vQdL#9a-`uH3*cbJZ~u9ag-fny$i z8aCs;3E85mgVK&vWM6}FH9o^WI#G!=%YOB#gT`1^VttnSVf4$YKja@-;zARB-`7v< z*imICw^KX73Gq-go6e?w^os0U0HSxH>60JLWhFbDeGT&Z$d3;9NWy;WvICuoZaKMi z=UvTpLDrtssbhiK&A3EuWf6!)>$sUlRcn5?Pk^OCtvApB=6suN42uKN-Xs7u7EjXh zG|>-1Rp>w1KB%sI*b5dGwFbuHNN=|})sR(dekHBL=>I~l@Nao%H=w0q==`3$zP>!I zmgoBoi7ylm<9Fw6s3&T%wJ%>VQmx(H)!iq?ABhdSzitwHlFNGcBW4sc&9DmTThb^qz`diS`xzQT# zhZff!yj2#rS>yfS5?}{inV5BfcZw zF5uh!Z8b#76;GcBDp7^zWtzQ%J;D}es(iWWWQNA{SvyhO`X8oyNL?j8Afn=x(zHct z7)3c%RKTPAyKS0gwVpGLqR2_%EowBpk>rW}MFfsR9>#2aOL!HKZtg$bAOe+#;;w?3*If zQk=HPWSlX7cF?h1PVE1D>LL{K&Ze4d!#Y2qN+^N-`~RG(O^Gjg~EsZbW^ipD9*+uf$K4Cq=H zxnYj(#+^eUa_1nRDkJJH|9$VB>+n4c)jji1MPz$dV4Ojf;)iYjgw#m+4puPdwgLSj zubNnwfz=z1DqFmy@X!!7D}kTo6yBjVFYT`CisjAgjS^cO%|(B2vzWb5PcrnxTK4xu zm?ZZkCy>+)-K8*)fo5JCWa@}^R!iI}a6OA*S&ibX6V zKk0=}K_M7m$#QEMW=_j=4tDXgH{_l5u?oFF?CXKmk73#~&>ha8CH{7jDKT2WoJ&sW zD1wk_C4Q6m{-YEWeAg*gP5`2Yl>4S@DAbob$M?&Gk2@2%+H*H2wu_)XL3fn{D8ljl zh41$!&_(kR($}4zJj3?zH-A0f2$4;9tH|N9XT48P;?coFH~9`z4S_35{xiUZC4&-3 zo3Yt|ee&RI&qBF zW$mPrwbqtHO$6De21%1=8zUX5=uMV*>#k-H>d5vP zz8OPyI|HLGKn`U2i>k8-dUX}5DJ(|Oy>)cK%QOwU>>~+Wn?bp?yFpx?yE;9q{;DTa$CFGK2S&xDNk$24GuzOgK{np ztsuRfjYmLjvhn$}jK3F_+!AtM`LVw=u&FUIGIU6>0@nqZq~REsb}_1w!VB5-wbS#J zYPBNKKJcnu^LTORcjX|sa8KU?rH5RRhfJ&l7@AtLVi|n8R7-?$+OVx!2BrQCD8{a)Kc#rtcWIC2(YYu=0edjgP9sFpp0=(eKUE2*>jc+n@q? zKTY!?h-S?Ms1kNuRAjowlnTQZF=#1S3XPx<()Wc1>r=QN?#W;6OL z2|Y0fxO0y=?Qi#F4?$+-Qpt&J>-JT?;d6ITN&7R`s4l(v17J7rOD3#Mu@anT`A z88>nZmkgV5o2{_IQ^TOFu9g}ImZrc~3yltx&sdaLvM=bAFpUK=XGx*;5U2#%A{^-G zEpT(GF(}NVJNzn$I*!S`&mA<1j#FEw4`lJ|^Ii?VA+!l%tC)`Q6kS&`LD*!rp)SSZ z!fOJa=BWFG0rWJE<~c2SnT{ykD23&sE?h7iTM20!s3!XMY*WJK_oA3FzU zScKW==wTvjelr=iu2>(0OLprW-Pv$m4wZ7v>;gB4M5m0(gOK>_@aIy}t&Y`H8crZ% zbo1L-*2^hdvzq`~_{<=PT=3jZ#UgMI*bQbOCzf~T53X2F9_QJ+KHwwQCpU%g4AGP z7i4m>KYOFyVXw`L5P#h};Q56X@OHZ-P-1qabm)G~GS>9sP0ToSI#43Q5iDCjG6r<1 zyJZa^U&>SXTW+bvJNB5oHW0xNpCGimZgaFJSb^??Uz1|jbXP-h<65N`CgZYX8jM3^ zSJ2tNSxr8>9)`mMi8nHw1aDz_?+ZRuMO@tou|Q9z11zdD#ka!jZfeXi(bGK&_vVQ^ z?b#6fYLRy70Mb9>3LcE``^rMcoxj~!hvBT%&cQK#L#nhF)C)iw(B$hY1fwak15v#J z-<0Kg=Zh1uk_^yGnO~&Hl|4?14*DFz9!$a(EAbT!5(<}0xUlYlC%`_JfofaWqfWNEfhlbLb2Ds@#m_oKXUJ0 zdSUbdO-BOnM!b2U2o3t3AQ&HGTzjL}LBTpwM2|gf3<(USB~4unKD6^_G>?@N%R2V zE+a}P6(vB@x|W>|ol!d5vws)e>m=0+2Y~#n1%kb=NXlT+^$#v9N z0Lt8wQ#?o)_j$PRavtm~z!aRPQ85^H^}u0bjlfDm(!3xG(oMQY?(DW6m1QdXq-PG; z7jW?rNj(vW&SZZ>B^q=2mU!8NLql4|nTI;pSkw9gbip(A^U<9DVj%Sjd-T0)ldwku z!O)$tFvVGRJnSI!t*v+U;QlSXfMu%J>v5B@Rq<`V$DQ>YTCkc=so?hUx&dda4;A1r z>~5vZ0E0M|B&lv|71*mTuRX`GB3G>9RzF7}+2HIgGrV-?p|bN%&4si|xxb+z1S}F2 zOBQ37uO?>1n_T3UF8nYp?uWnU&+53X|N94hR8WunjZ{}VH({S=x7sRbdLq7vyftJ? z2@;dF{)x|0nI%sYQ|%pe)%r zxP>}6S+ylPH{St~1KGov%?}z^A&&&(B(s+ngv{wKZ_L(*D^+nzoie`$NZ_*#zQ@&T zeLY@LZ5;akVZ}L=Qc=fIphsO^5%YJ0FQWW3*3|ahxk16yr=ZgTqunNMFFko^CZVSh zlk<_(ZLf{~ks&04%zz`tNla=O_`5r6W>d-%mdkEryHLIgIZyrq88$=4=Im4xR_}|) zZ!?V3+6QZ7$+wYJ=>nqKQ2L_gKw%=9`ds2Mdo6`avM-uO$tdP}7Jandkx0}XQhkn# zzq9uFBxvJ^#%sW$s)6J+j5 zXmAN{4mTo60nJnc2C6XtOBsVbJYc5&a0nZ|e?0yj+kThaCezk^Cm!F<|A=cu`uO@u zMai;5H6<@WD$n?-1{?Pzr2mF?F||EI+58#(N9dB2U*+$o$gl7(T>0jTu!?94mCA7^eb%}7cOyZN?nfVx+L$x~x>^tyJj$vmKZOXBKkU?mdopygE`0+rPi zx3F#q)PBC|6M{n@2|m%_24@G{?ql$@S=PPaEh1sG9v zxo35;K!!nAr&^P|c$6z+&vUa@eX|Uw&nednN1SCQSFNx={#kvzFb``4ixf3m zIY=2lKDmS2WGQx#gfP0BOAD4i?UoNdWtRz&Q=#>Y75@;X*z^@rxbLVa`YnIz{oaTE zNGmThd0`N_?*0!a>=f<^TOdF{&|-km!E9iB4IUs0KsvY|y6}%EN>L%XAjjOs+WGAJ z=wAmEmK)JGoI&Uq$`1%&(sh$n^lmT{o9pDd>t(CQ;o9Sr;gFtdZ>-qZg7jbc*P~uh_&U$wOO;{P3h!F3|a}dH-WoGGsXGBvB2c7p<>_CnJAYP}_#gD0t)$ z$Is_In%83bCJkJDij^-Lbnh)JKexs8f3E|dDy=BUEES;}7{*+oxV&iNODhNv#y<$} z=-mY})V@*#j#N6^A*B940E$3$zfmk;3ReX3DO;=d*_(!|f4FL$#0mL1ToWidl)O|S z_mi9mELAQ#S-D7+a2+=an87R;9t|U~1&sgF{`AZ#ZsOL+=sb67R?kPP;SQrDJP#F^ zsr<9}0#5FYl#3;3$mekh_XV=g`LVN$408Oz1ZU^F@kv7gMcyAWTE+yQfcY<&di4?0 z09J)>xHkZoQg!{E*RBSy?JCKOX7n%2$6 z-dzz8T10-8&ZG00yi<2%x`4@L8oj$ZXP|WgZ7E%-(h>@kqIJqt!{ou4J@Anf#HcEw zPSv)TmeUHAmeK2Am3|mkp+~W?)6eVg;c7e2H48x zBw;iPnvFX(a}Y+nn8^W#;6K4qA&N3hg$HYE=n|Dy)1^$6Gxud`0!yZ0d*p;(03ud^ zy^hvb&{_%?^-|c8>2fAn_!5YCX`?Ov6`*x_BAqZdP7`m!E4|c0ttvHBo2}NJT1HQs ze_rYk1e$5HO|)A}>0a7uufbmK{SDV?ndJ&?hXXVWWefy|nb5Neb%C#pK9tl%P-U{v z%DOV=mf@tF5qHo|q4_JBR-PLXOPn6TUrQ#9e83Sw*iIv zU^kn1C|EKWK_mS%Ah;Pks|+@@OxM8{T4o@Zf(mvI z55b=nM5d)6kW5m_Lx%`#@%0J~At8s1=`iJf)}P0CE6_pa-@`H5WIHbP7t4>QJLNX9vAkd8^)UWbAP6$@LZXWxAVbOYkgCYh!Pi4lzTy1%B>Pf9ZYnAH}3- z*{;*nGg_ZWZvV-oB*dF(WQ0^x71UW+hk8Cp_g2sc=tD&+CHpenk8FnaqFX;|TH%e* z9ifj@(1+=xs1s>xxwM`XyvIu)rw0VwCz$GAQ(yL@$J9)4{viA{r49G#c+Z$S3LaiI z8H1fq(Zeb|M4x7oLLr4te=>z$^SG9N2w2ERGL4D=I9HuNqS6>W3ax}f`>ts|P^Zvm z@RHI@6xXbm9v9ry(J7RMY_2a`aPR71XW4B1S$a}He-4?~NS8>v_Z&;WYl>KnqBJ7-hpw*<(4p-DB;Erm4B)LPDS{#kCnL(dCt zzl#E4aVwa$czprcYdPwIDCcme_C!|1U))PSuuI$zk*W(Ap#uWp$Ho58;-{sE*^$YJ zfcvRRKNF?1B4(sbe>9@m?fS5nel8lSJLrFy&YLbuYc7$Di~9RZ6dwe@uT*+bv?gxR zf2UDHLuJLEg$yM9E&WcA_+R7?)37(a^as(%yhwk9vCtzREf&@5r9ab0gl1l{v<@{6 zC3O?M!(VOl{tcWYFh zcWyW`&qG3pOe@HR0(&Pf@bG-DEH=)i05VspTrF}nH!FPJEICoc3S)q%V+;_aFop)l zP;Po#SxD2ff0q4{T+T}wqs1MJ(W0uHR%OPB;l?2?$s`KN)CwvpIWi|N=M^e1V@wxw zhcbE=o-@%8PA~qV;Cea8wH_!IqWp_Sb&NfdNz}9rhH)r2Br^t) zMeQA%TY4kA4{q7j(jMtJ*xS>w>)_TMT^(L-L2JjGxOJj&ZV-)ggVi{5yFFtT>@y74 zJf{=@f2D8cEh09yg6#A&72XCLgRGuD?B$3Jh}mU9;ruBh4ewxD7AzgZW*I&BN(>mh ziz!$}F_R7^NNhzIC6VZOw|xa*NB`8Izi`@_wbT62%UAIpm3#SWG=pW%ix>j~;()!P z=|~#* zs~lrgJ~te{KY{96l8>ex)n>uuGMb%`c#snwpktC*Tn4EfgILng;xZ@8J7YPjGNU7z ziy8fhkvX(Gk4lucz zopwj%<+s`80do~2D`Ae3vs%C2n@KP&f1Tw*W`gvc{0^aDj8k(=qot>B`xmPR?nWM%F_Tp@8f$^zMC-x zxq5eR4y{vI3_c*+I&2E>TUd_fzE&@Pkna^rKrwaahT_Qipb*^GDr(jJ{9!?Jf23IL z(A^If6~w*; z?}1Z(f$4(T18(_hnK5l-&KgXmo>nd-3e?K(mCc5>6~3tQ)BGjdE37LV)Q^&pwQ#S) z&+u1NlKHDJYC|%1Na3%+nyEu^jPYK6&d&RoKPnRF@-yfpj11b3Z`tb@e>%>eq_``W zHjyW%v=QIIjMQf2l5wjwh-GwmTwut$YYW7S)B^oRCLq)v5C#Y+jB#TgxNhmo8p)ig z+m?O7x>V%vtNgs^JCwARHbhpo8tiRe{t^FJ)aIYKNc@@Cy2(NO%_oXe2h_a_mDEVt zmb7j{8H0tCIim0{RsMyjf5xg%)u5J6>nIZ!1*crg#_ZLsWwQbZRQGHCjX?b^(~`4- z%8a=}HZ#K!NGa0IY^23L=>CEKsPgamPfQ#BAATw`rjrHMokCmE$m&;$>$>FdWOl&m z)`l3}takOU{5O^V!Y`N18@mT#Hk8i4BUNORx;`YLf13b*mCvaBe-8<>i!%lf^-2;U z9Xu^Lie6DxK3T%#A{V~ncqJJ#j^vgU*fE*tQzR9Izl^818it9apbd#{E7lZ_VRf}E zc~xnS$S$5Fa)vkpeqLJ|acM0jlw*p5vTxcoxin9j54VyQ6lcuBR|hLNBB)YOqvR9U z!GXe8h=^BOD85uIf0M*0GA*2n7=9$tiDqrej<}AS5rg&?cv&o6pi1XUOT5%!|GH4f zvaj?*$t>7b&`TGoQk8_MWDe?v2r}Dt(=V&+RUEinS|JRG@uWH{KKj7Hj+!Oxo*$h3 zJSiyE3UmxBOJT8wLQ9;~a_QJ0+H$+Y7xq%5dSM}87BbO_f7fWu3%N;ZkQ#*^Fy;8l z+=R>08U>@C^*y3XHwO(!x~UB1eKROeJu9R4i#yRqn*t8KOlnf8LRwpLV^InvOY4y& z6Y0aoAta#nWk$@|ua--OGHHW!xhjPv3`wq-h()h-g$Rf$X%kb&Wa>o&%jl;Juf;h@YL`0DJV={S3<~|Q zxVKlNt>PnLnaimuw=2>%bOF+Krp5q#4}8Z1N3?_qAS?S%)arm{Ww3y0Sj8X=>X^3N zqTq|)7_lk>iEJQee_T8ouuaPZ z`ZGo<5HsR>A7m?9YOlD%ISXt11#1V2EoPx>=owC%+R@3XD;+F;=(T8c8;0RJ zTsm&wf4E6n@v_B&nSvZcHW#06QG>Wc4M@NZjXq_R6tyGE%uPgmQ2BjdC;x_^K7e<&Sro+Qon7}Z6ij>=e%vr_NLQ=+o& zBpJok>#>>@t9yzoIjkHJE78hf09L;KB)w^jj*Zi;(XexzZjXje(A)F$&QZE+l#Y+n z`=Vi2$nPAb_di1SF@@cJ_apQ%rsI6t?-IX1$@BzBhvht-IL`O`<;uJelNOBA7;pvZ zfB49mXR!WQo}M^PexS)v&gcE|!8|>kr>}-xBWE7K{@1Mi2C+ZCIZxkg5`fhJ{k9ES z?Q&jg{rY^Kz9*250O|V{Qa~U%CqezPdlGEt!}O!OX%T>bVgb8HsA8Oc79FMkJ{1BQ zAj1lz_A7b%#c`?Pf$=T5(=0B&}8~QNxNwRw*HCGxKs7 zAbuqb0wZTm!A@E!voDKNVzcs90B98$d1mpu$?pVH>>OjYdz|h7=c8OvnalIse-rG> z^TJ7MQ)h{-eY_~oi=$1-J+wg3^YM~AU$kfB%yWKA6u<1KR)jRN^V))`t?f_yozaju za%E*q=!xg(Q{=;$gM(CgBtI%caf_(Rsq{@aD+#S}=pC z86ka~*GGN4VU#aFW&hkLem=}?e|vn~F~*%Z>oir1(1J)V;P~B;pF%#~KE~a%?9Q`R zT%aOCGZYoCbw1uX$~|Kog$!cB?q~!dDf0Qo*L&^G+IB- z%c7$kALW4)e5h-jQveUupWrMkF~&y@j`9uT{Dx>3B5#~;1W8xjD8D&0f6BK2KH7bP zZxi%s6BzdKTl4((Xp?-8aO}B$ceSl^VLKn+QQT7@lRQFm{BB3JY*{801(`8^XP)m0 zD?Wbj7{5On_W1Gh19`qL&mS4*kHL?eO-i0WS*?JlPt9MR=TBSiCFAu3oJ*WezdvZZ zSy&eKQ%>+G2tl=09#H+Rf3Rl+Zi1CZ#ESIpy09nYSNtA9DI^G;;Ll9Z5|JT@L8pS6 z=LDaMhSef9kKYv$QmRE_E9?E9x+#R7EG1O<>7Jl@f=`e0)6s|@lKP$XQ0bTR{H&FQ zqg^6St}cX+CEqrS#MdXVu^sKs^EdCN)gfU|nuEu;t&|cN=jWpWf4BaikH05EkAG0a z`{60><}kwSr&av3l#hRYOk3;XuMV}FV=&DU*-9CmLvT+ z+WizQMWlnqEBL#Bo<24v@d&Bg{c`sRFGPy!hJDXGw0(p%#G{63F=LblwcdY3eAs2Vm zpQhd8QdM++1Q6AEX;GK+F4-R9ZGBt;ETo9?DCrv0D+1IDFD2JwEAD ztgpk0jFnYAjJJ(@@>0vEgx;*>?T$KtwXGVHwg{EYV4k~Ae-(8Mq(-WYZ0p$a#PooH1&29;1t$_t9$S2(58GNS8RjOP4xdqRX7GP!mS( zwXWr~Th0}t^{$I4?CPWqt{rr_D@Dz&!?e*gOjo$xOPgE|Qj5EaTHR}@&3zZOyYHqB z_w%$_-a=dCx6@YnYt$*fK-=U$L01^rp)ZLX{|8V@2MEVi07E4e007D}b)$q0%WLwQzAecs$;-Nd zASxmv2qLK4kS~#nq5^hlp^Wh%1BQZAKtXf}4pBfw6cmwp&P}qWT{hR>FFo(vkMniU z{hxF9eEi_U02Ygt0^2UTZ1s{$s=JNge?~JFs`gh0d#dZJgLbsfiWrV%$9z#cWYT!t zjF?8kq{&_*;S2Vf!HtPzG*RvEF(L`GzPc~$iyD1Ci)C~-H!lhd7@Lg7h!G1np548{3_1!t0yE`k(y=0q zK|2;q#^YwpX>6fwMt8(ipwh-oMr2;Z4jPg3t-iFjiEVP5Wj8W^l0Y%930Vneg%uYl z%W`q6JIRq+8;=~^6f>R1wX0ice^UuBBdtAFI2o4_6~UJ^kg?F#!|# zYr2j}n9N@@1>7~fuMD#_D5w%BpwLtNrqnEG8-Ir6ou2E2f_VZH!ltvzf8c{mpVs8; z#;m70j=`}S=A%Yn>Zr&LhjZ?R7!(;@XXOpGy-LRkte_4{1m@;F!7*B7==^LD=cSdP zjHE!>@hvj2=j%8b%Xsz_e=^rfuoNB3(?h2TOd@BOcPH#f(lJ*VPOpv?Y41)Ks62d1 zDEI_jNFx|D6O@q)DJR1``t~a28pcUU-Hb zr2w4G3E7TSV_>3VOTsau3RY9(%sAca@`GltA}bxT)ik1H!5XYBe?kY&r90kZSdnDh zJd5IBgehf8^CirA2(Y&E2`TajRIr|su8#*Igb3yNQi%@vQ|Qug0WPFt3=sf32k5POw*CcHVT&e?km<5rfT#*GFEMn@M&;M?CEXnO;5$&MkH%LTOA|6AF?7MP{_m z+0sTkD8^Y27Oe4f``K{+ti76n(*d037~VYDfUe=5dU+nO0CJFdc)it$BU zO%5G8uizR=3aYQ|=4MC7SFo%Y*Wx+?$Cw=WD(3RQ4HU_UDH>}?$Qz?#n3%XpD7%RuqWbW)B70MGJctpNfASD{o7H++vZu$4o1xXFA?ww{ zbWYj1)>vOM11H((N3yjpV{pzA1&`%9C|O8;qTz8oAyBw>%}U=A6;BG(jxNlRaoAGy zw1!8qhjHlOwzNr^`JZaog`d$CAt|9Y>il#($06H=pOe~P#7@x2FSr@lgz zs*2f8e^n2IOcmXU-YNne%Gnnv>GNc2HZc_ZisGIydd#(P!m?R4 zivLigs3CR?D@I^FJ=eFEUL)RNUX(Or!8C~c7a#Nf0~EDxE0#HPRnWs=+UPC{6t^VV zf1XabIi-5(-Jyy?!mSgUnpB~XV_Ytcm>sjoUU_Xrk!*W}#(=%bsJCjxKxz05sY_ z@G}Yk3Dc=EH=Dtv!#Ajku0+&I@M|%_fIyc`EM&DL*fHD9e%b4a#j?E+)M{6be`;Ty zj5$`+JbiP}?32xoXwpP8m%f=<^e{tJxy7oghoq4Pa<`(&N{~HO^qjLoRa7tJT!Sk7 zSsgN9G|@;e$Q&I@$3Q{O#Il^uu=VVmiBk!-Mt8Jk<70+$)=(E;&_XY3YUUYE+mq35 zGroo+M7UH)O&>)Tg_BG8Jq8ffe>0TcVv^EJOj3He0dUd!GEAWt_X^@_X}^c)tlGf( z_1=OVsHoe4Y4tl$>Dz%B-ohQ2HH10$f&WTSjk)Q4h1*FdNq1jYJA(Ovw%S2VOJTtX z>H@W0L#UVR!W51#ZKi)IoH&G~gQ!g5)U9Z$OQB^e8fZ@i{VD?~tQIWX*I2w);@?C{sP+OFC4_IfZtP}LT~3FqJG8Qta_S@ zd{Vkvu5N`^@ADRYnG%9GerFINTpiWH}CfKwRa=su8@xYMtWNUdJgtNAiV;Y+Vvf0(n9&Vd3lf?a|2 zyyMZp2p%U3hp@Z!sUbWwglALO>sM2F-mChR0km_#io86qt3HtRNa-qlkvtm4D=F+N z{ry3=vh!+J>Fd(tHxEt;zf#bwmKV7$3^W(rBK+m*wvRirDL}s&QrJB?i6Atd4)_cB zfJ^^8jKAEEf28nXf9Xdl4z_0iFG!aQePzN$eu?%GQ4sL##QTAOx3DYVE)$-Pf-<3Y z6gGQOqPX1C)iER{rbH=aO-fALiUh}@oulAayfieU^rNVS(J z)mTl^2~@tAe^!b)l2(foB|TZJmNY8*#H->Iagn%6(yPU_l3p*iOM0^ymh>U9SJJ)W zd9fc5FN&8WzhAt?)OC&PM)w4HMnSamqf#jJo|Dn53@=S?$ zm$)mKmy~z{%+m=xH=vS$SKv$n;7+))4h8h&FQj*-2UijZ-vAYN5vYCyO)N(-fvhgV zm>{B<=vszJt~HqKx&S4vAWB_fl({a&6!&VByDvb6JBX?7UQBaugx76LJ#Go~?*9Q$ zO9u!}1dt)a<&)icU4Pq312GVW|5&xPuGV_G@op77bzQ0`Ma3II6cj;0@G{*_x6$l@ zWLq!9K8SDOg$Q2w06vsBTNM!*$jtot=1)l8KVIJeY+_#EvERRF+`CN~+)~_fcio`v z*4!Y8Ql(|4lGuxq7O`$fleEN}9cjIwL&2@>M%LYJOKqvn8>I&WVJ`e@>#4mHnuhzUW>Zd%6?zt$4SI~lcxhl zC4TO|$3j~w-G4Q7M%K!ZiRsf{m&+`_EmNcWDpuKnz~ahZga7dAl|W%-^~!;R$uf$l zI4EIk3?ryIC}TXYW(0;0`IS)TrpP}tglbN4Rm~aBg2TZCuXEfjpuhoC)~>H#Ftz@S z>Dn`9pMU{c7+4fO0Z>Z^2t=Mc0&4*P0OtV!08mQ<1d~V*7L&|-M}HA1L$(|qvP}`9 z6jDcE$(EPEf?NsMWp)>mXxB>G$Z3wYX%eT2l*V%1)^uAZjamt$qeSWzyLHo~Y15=< z+Qx3$rdOKYhok&&0FWRF%4wrdA7*Ff&CHwk{`bE(eC0czzD`8jMNZJgbLWP4J>EL1 zrBCT*rZv%;&bG!{(|=Ze!pLc^VVUu~mC-S7>p5L>bWDzGPCPxXr%ySBywjS7eiGK;*?i?^3SIg!6H8!T(g4QQ%tWV0x-GTxc>x`MRw2YvQwFLXi(-2*! zpH1fqj&WM*)ss%^jQh*xx>$V^%w2Z&j!JV31wR!8-t%AmCUa;)Y-AU<8!|LS2%021Y5tmW3yZsi6 zH<#N!hAI1YOn3Won&Sv+4!2kBB?os0>2|tcxyat=z9bOEGV>NELSSm<+>3@EO`so2dTfRpG`DsAVrtljgQiju@ zLi;Ew$mLtxrwweRuSZebVg~sWWptaT7 z4VV)J7hC9B-cNaEhxy8v@MbAw(nN(FFn>3184{8gUtj=V_*gGP(WQby4xL6c6(%y8 z3!VL#8W`a1&e9}n@)*R^Im^+5^aGq99C`xc8L2Ne1WWY>>Fx9mmi@ts)>Sv|Ef~2B zXN7kvbe@6II43cH)FLy+yI?xkdQd-GTC)hTvjO{VdXGXsOz-7Xj=I4e57Lj&0e_C+ zAH@(u#l-zKg!>k+E-Qjf-cLWyx_m%Td}$9YvGPN_@+qVd*Q)5cI$TrLpP-Mh>_<6k zysd!BC`cEXVf*Q0Y(UgdE^PYo5;;FDXeF@IGwN8mf~#|e4$?Ec!zTJEQCEM2VQr*k z8Kzplz+)oH5+-jyAK;GP8!A zSKV>V#gDFTsa`xXt|1Uc3i&PSgl%D=JEwjW^F5vD0l6G!z|~>y03#T)?a;@!*(vAwmBFr?|-8vt&)jK z!?QG5DNz%WTH4H>vbUDpIEl_O19mVOmP_8bVz-kCsYEtX_1Ovb zj+KS444hDHKJfNHwq&hQ29#QGU>;3P1P+D_kVfmXiA~y=y{YGCGep{s6iwTA*ge*SZSH9K;{Gc1^NWT z@{>XOdHMwf#oVVr5e4%x1I%+r&CEE*Qu8V$tmu5mm?%|OR}{L++~wCzm$RIp(7a-4 zuUW|Jw)8G^n5G$)e{tS^RU&@6hKR!RWWQzWdvkgoyCMKT%caX_=zlus#?;Tc<%xwM zJewbXg?^RAe+_wMk=A>m=A@r~0~#Z6hmh`q^b!Z`=jde+%aR2&hxQ>`<7bXmDk+!% ze+$*7qh)2_^In4P`ktr>O8z!|UZGd$clcz~c=h>Hr~z=--z_oAmq3RVC-fGwS&sJu z1-B|M{Jx;us@*hy_J0o)`U?9cH0RlBfikrIP@yl=AE9!T32=5+P-i$<+jN!7%+FG| z&!5nrvTOegUa57UpZ*+hJA>p2ga0MxsK21E^Uo8!3b{#gdjViLw zDj?{%qL2b=fc}>G8S&udSPszN3la#if5csvd~EsYTU;zzV}C*VHpkOH)4w1W41*h( zbOQ8mmEBsPEo@ObLg z93$OR0O5mpOQ~kA@~zx=sm%~6;&yQdTLO>ECg3w&$V;K3Rxm$Mx#E3$#)AP`Y5ET>GF+K7Ons=3AJy$clM99)e@XPVK;DaXeI#{!nwqZB>eS#gwM4Gc z+UQjZ#jeu&%Mv~fw1GC37KsP2q#o_EXrxGY9xc+Ai=@m@d~k~Hixz2HYVc*MpSt<2 z$TixLN>0<8uJ7@5d0V_2pQVkF7Vq{{!dIm33#3Ft_}G2)yjM)!d^I{4d6C{M=mM$U zf6tOXHRy?rH1$Si=)u8jv@ewuk!jjLMIV6_5a7L3EjF@9Y$D=$k&f1(*4c#dO{r8e z(v+H}hoI~Q3P)vOmA?n#aMPBi8^%0|sj#w@`5rIzh zQ!tSbr|=trz3XA)gH(s7qlZqzSnr3Gf1k$a6s-R${PJy>^CsjPC{3BNQR^|!p8G=V zW%6Eb%Fa-3=o*=+gf}`(Z);pdp9v&gz7C z*}oPKd5d(eNI!)2=dpg8p7eD2T72>A&r(Oc#kZr8Zl0T=_oWh8{A0N9vXFPxf7T*> z@F=#&(1(wn_rW1wit#=dQbR@h$qP^^nkv#IIQ!Y8pN*0_p744iBi`tUFE&yiA8GoT zkhf%^=TflG&)tw(+<*mIXdUgu%{CxCbK8#JowN2@0SO=M^#R!H6?`{v`CUe5FJ?Sw zyCTwGaWuckZrbd*cS97n*}$HSe?&KIhht~x@pz>vsk20GwyCM?#|=m*99Q+xzrHv4AaMp^qVvE1qqxlUZ9nHsoy&~b@Pi; zbSxIXMqg&hucX*B)AZGlZ<_wNNMB2M8@&ts^)Xsm@z<+UH@_KAm7Vk&fBsM1e8*q} zC%twfR;0hW%s)2}p$g))S6XPbY}b-1+g56mZJ4@bdpGTo?Oxg^+aw*3?Jyme?QuE* z>k?^{mF+lLvMtd2WXr!S_d)uoY)gJo;16IEvvuH(Z&YlEF~4MtgVERw{mtdnP$YGQ zLX5QNiKcH()87Fhz);gaf8Zxp{{AQY07^yr*Rp8*MAN@Z(f^s9xq-6?{;3ChGh2NJ z5h72l13;O%#FbbiB|~{IS`?nriNJPIz>*(s7WJjAq^m9+Eguv+(JTTuX-2FlipGi# z>xbCfU@qZdcZ!5pBz#h2ErNo*n((t*0g$h4ur7sb6@-iGc#L$?z0#Uu)Xh){P%^cBVZ7wOS8%9=n+@X6!d z0j(RK8a`Hw2l5S1eVl@8los!kPhF(7@ijcCcL%PBB!<=~MKK)m$2=`T0Eu_#R=NXI zH=h{{`4iqLa>{Mue;U1>Y8Hp4#o-&#kU!*$UlB)|#anUx3hcmxfhe0Q0&^ZadKv7! zbC8#@-C);d@h~h3LJ*D3;sie9@`|I)B2%(-WLk{fsNVS{3NYNyg}nR)ue=tyK_MEW zlVVgDvV8=;&C^-g=a&0t>2a|ceQr0P|8{y#_POQ$^YjVXUgwtkpQOvO&n@>kdb!Un z_g|vV%RaZ<|2lm`_POQ$>nH%Z&n^1GBO19cTkgk1x9oGv{j_*W>RF15CZPW_^!Tj4^T{T!k9N#2;RO7iBy{i;&QUo$Tz+ znfE#GOwP=ozrTJ1Sc55We021t`blp}YoGj;%5y1uf!uNG{2U zc(N@c!)lX%wI3y3q;Kp>H=-52V;i3A7>>%(TwkwPYfo4kR?qm|#C16kwWU$vA^EoB z6NQd%bM%nHh`l&oU46V-HClA2e;$PpNH>BcwCIK7lE8cr+NK@KmP_V`PLn)Sf8 zDbz3|Fu5lWrRhrFHeWUO$ci zK|;QNMYU4B-{xxq=2gh0MJ_>CzIO%I2C`dQ0}U%zLwzhCD9eXj_~Pck%ya+e`Xnf; z1j}62O+JMJ**YJ(mx~=JE+{p9z;saHl6M^@O>uaJ(zL_pbbfg95AEkMI{P zQrP_-wu~WeK)#DjC~RTz1jWl>>J%&u_A8uVH0UJwtHj+O|MgSsVS$&sSO#aG3~yMr6^X${<>0 zQle|Lj@}|34Nrzqkl>m>`@k4<9*UKfc&#)tI4W!!rdA{x!$&L15^Z=Vs_fD^%wvtV z4GjkS3$YfV7A6gE;|0p94J`((b7fR@!QilW^Ak`-SZ_W1@A@+aUavpvf)AYzv|)!q z4VaP^lJwjZ|A#8&wqkPDwLy5?V^3lqxn2iXkLKsKp3v z)lw?h02Q#9dcl*)Nir~*8P80hEVZkB@JF-{`qDZ}%ic=6I zm%FuV~79YG9K?LnO!Z^jy-SC}sEQ=yjZJve> zhLEVZ{w5(ZoQbyviJ%i_b(}#LLsvu9$Wy~P3VYSGP5*j5?A-{?qgO|N4=ynDG-o(t zyH$VDmx5O`yrrVG6j*nCTSp%*G6XD#7Z}brjGFxGwwDl7VfqSEf=l#B~g+q=IW=b5Z!M<&ucX9YRuprWo1}sWhaiRi-Z__Z`V_?vU@yo}2(i zFdD}DxXjRbRIlL*gGOwBofG%{2tGu67-Ps#wKfT;#rvpD6d}xUOenjnl!5P12Z*7q zw!2cYy^fD{X!wL7>>Y4wID{LA*tcu0;U>}9^SSiBWz#PcPvS>06_ak^GaXZyW_ZJ^ z=DocXy5lp)=I}XgE9)%v+M=maz{HH12<9-a6nE%cQa3OVKU(g8u^m{zqPmtPawHNk zWR7wCpHO$PtcdUx!|AF`o4_oZJa38m07T<0{69Jm_wcovhi@1zG{6_Cwr^I%)O|y^ zYO*wZw@?12&fKV)RzYoo?-}~1q;zC-qb%&GVmhg#?!i<=i!>0|LdgHijnpTlpo4>E zJ*c*hO|z2vk8U1+%7RKMp{yWG^+$Y3922QYvQ(DNhU(N_cuU6$Dzv>0=5xNOeup?c zNo$t6oTaTgSFPlQTvG0VOE^gcRX<`ALi8~FK&RITk_PxKQN!sc(4M3F**1D|x$G9+ z+(ut+b|{%kY$001J2kwwjltaQEs*i>3w*#Zn|y(f7#?GPoIb8Gtu3 z6l++mVQpv&_A5%Vi@5j`T=XJZe@D@ehm?9h2I}XB_@(}4kR&~YHrm3(cAUT?`X&;S z^aR@e0Z>Z|2MApz`fv6F008!r5R-0yTcB1zlqZ!0#k7KfkdSS=y&hcen!76`8u=i8 z2484mW8w=xfFH^@+q=`!9=6HN?9Tr;yF0V{>-UeJ0FZ%A0-r7~^SKXVk(SPwS{9eZ zQbn8-OIociE7X)VHCfZj4Ci&GFlsOiR;iIJRaxoGXw(dGxk43#&53m>S)=uTq|9>^ zv)ObhvxHhb=kS$=qTqy4rO7l7nJURDW4f$LID5`?1J}a&-2B3PE?H*h;zu740{(*5 z&`a#OtS|ymO_x%VPRj~QUFfu4XL{-O9v0OB=uyFEst^tz2VT!z4g<2#lRmMJ`j5ZM7xZ*AM>%2rvSpe(=Ig+{%mm`qu9D$$nuwfAVtg)wU1D1@Oa-0qBDX0)tL}srdd3AKVr| zu!4652w2`d0fsD36d(v8?%fw448z=eKw!vV=GK+cg<@B0$2aAJ0j^IF7?!T;tpbe1 z;%>zpHr&Lcv2JbrpgXly(as#!?0ARvZ(9Tyw9dPLBI6nnUO(iIoc8&R_JI|#ma!w& zAcT?E9qq-QVS__Pcf=Ea+u?_rKX*`?w+8~YR^5P4}7sOkF z9^v<)Wd+*~+BRU@A=_f}TNYc7Hi#bHH2iMhXaTblw9&-j;qmcz7z^KOLL_{r36tEL z;@)&98f?OhrwP%oz<(i#LEKIdh93L_^e1MUFzdwUAZf=#X!!zWeTi=n`C^CXA?1cg z9Q>gxKI!0TcYM;pGp_iegD<(`iw>T3#itznkvl%+;5k=(+QA>Y9v3?#|5p?&G^NcjljeZ~g^f18y^%J9)Cd^>|=NijQzL5oim< zlYvkmuB9`wBAK$LhSPsqg44Xt6)qW^7KbGx93STK5hI&60&Pi2F?cADNrlr=CM*jZ zLoF@q;~O@SuHKr*C$ow|6UMLxJIZx~e9?Ss^Ty`ZaDtBpPPoAs zJW(yH$N4T<;S2#yPeoF?lu&qNOqVhlu1EGea_2aYXH89ap^|@L(Gh7>iYStriu4X0 z;c?T2YBH74HPSR?ZZItAvUReitVH^z=C?2`C}=rO7dV=-77=68sE%uDQcf{6cFi77 zhpm&o07Yne+0~cxtd5_*)sP&)@HC}ize=e%9 z#0xj(imzo}crbrYe63*c7RTYjDhiU1%Z6##t_Qui5BGbp8h+wH(WFEnJTC%R=pic) zGR)Vxl-NNqUE8ZG40R2ST?P81rl{~1FV5^e_8Pg(x$FW_6(mpMLKFJ(*W5>({#DW*Q zoCKbj>CJyx?{us_MShE|Mu(*hn_8mTv>ROv%chy0TJ@sGvER$E`JN~loQ0D;f|Gu7 zWz6bozzKCPos?s8CQ8kPJJs7yy@Vnhlrv7zVopqhG;I`3KjYvJ7U3Q84o~47P9z6E zG=+Dj6AqqAR72W5+#J*NkpVf)wXA6$(M~T?7#4pzGDBrUrkr3p#=R| z)ud>4j>mb%X;#lOggUgWlJKjV=@*U0pX+Y^LM!$sbuI0$Ut`oayK%Cl!#hQF;YI3S zNlkxGOJ@1oTeu+m*V=%8d-n8%+f;C_H)8o;-_FbP`qm5+m$!#sUS3~az?6UCnEncp zrIoW1GYikZ3^9(J+*73a_E2=I+@yTZzO&nHEt<<$te&=8HKwBfgjml-JG}$lI=92@ z4z$bd>F@tEaq6laA2^*uV=f+<_SYxIZ2lu1)15Avq4jrv%t_4M85a1jrdBbg?&OBO z?w|X;yr%s=o>F|n{!ss|&@a-Ga?>Xp`Tt1WnzOgFxn}QvF`pdqH+A0O6M<{R?*8aI zm|Fe9w=3;hq}hV*9V%VFm_Nouyj`+eMRi@5yyP88PxBQT&vbZ!!)Ky@-W>G*(aL2R zRrh*#Vd#O=-{*82{_t)2Q0>X_c9z?Dty^;DE4*(gK1oaCZ038&qGr3{1N+o{&GW)S zR_RrFeoeXT93w9WTJ=k2WmwRsyZJjz~raN31L?*7OZAKosxIC_$obw$Vto-F(G};KG84}n`sf{TwU%2wY3la+hh1Mo zOk8XAThu>BWiTy&7qj>ZQ^xVsJ)L}CZf)Xc&#mN8-WF1DX4>(>Q`45ejQ0=-ZM4zk z5L6XanSS@s%!u+}4U5KdXED2N1@ELz7MFYE%Vl0?GTZp&z)8j5fxVV0(M{Jk-YLI# zD7^e3@2_*4y-s~w)iFmb?A6PWbS|JU~kQ>A{z z<#_KpR{ZVn&J%Zz?8+_T3iQ3CX&uXK`8Ms6*u@`B+O_xJ&pYz;K_cUp%GV7lwA_XQ7h?=EiYO%jA1g4LkyE%H;C7 zPBKh~SnewUyI}=DY{&pStppCf@lAGIC^PvppTgt~O9f-}d3G+pn zHcEm8XU#X20bkb$bjx(06{tEH6~T)57MRE&F1=%5uthQcpfXUA=H!#g@?du$?pR}B zus~7Bs}5H9dx4fr4CvY|pq0)*@1y!kP7|oePX>Iq6EG0Z0Tmgcm@-Wp?51-IwPcVl z;ju?iv_==K$b6Bx4B|cu^pKur092#|ys(EK0ARQEYY^^{l%|QCuAjeEkp14?q>9h4@!6nkbbJ&fg5yu+?X8=+3#!VJj5-STn zB^PM!VxULuP~>AB87AvHdVm8Jad0aGgFcF?DbAA>SBOrobXEl`gda@_j7wDOI$XgD zA?Lm7ffXYk=VyXqs+K2Iu@*=nEBNf4$p*_rnW}xj5^+A_U=u*+w%i1|eiP93x+o@C zhJh7Ihbe;@`y&KjUXYgX_u)8xbzqD+z9U^n!xP?doXqyT+|nlWGZ zf)zbpp(6wDM6oe2=%E;$(+^UFIrO3?4Q`17gDC*02i4ujCr@1I$qFe_?ym&yj++j) RhRK)Bhkwq`;Yh)md4RrtR%sNbw?F7+wVN@9oT5^KvyxHCChVwDz29-_(~6`YI}kOI zb^sOR2x~T#ZdIJ>Rf@`fWMMck8Z~Fk7!ymA-q=^Hp5eZ$X)}%69EWv#a)HMQBo+#f z36F86&q=PH!h1hfL>Ol{cXt`zy7GFq%Eq79O{IA-u!cH*(wj1wN}D2M4WT6o(qxrW zEB}r}@-+r4&wIr;xO0(AI@=cYWb?m21~K;0A^-T{gEQnxfCN&@N(#Zq#RXZY87O0m z;t0Wp7M~;I&<5qU1T+?pjfUye_TixR_f>$?rT1}+*6u;9Gn0cXM{`4grB6(W zyBDpHwv$&%UIzt(jZMh^e3jZ{I@kE301olpI{yj0+;ZWogmFjno1+v zMW;sMFf7sR(_fhVjl~QhEC!kN?S1GnQ8&fuPw9z{5eDbyAAsT&CyjpUf=RK)X*YhW zwf>HLeXJxlm0mFjo>lB@ni;CUkg)*JRligsG*5>@wN*UJvbS&X^}x zn@^UJmJ90QY)d4OLkji-vg;l*>VWz+eRS?0G0Bg!HhZc?2Wz}S3kMg^_@+65nA?uo zkBwh=aDQVGH8XVK>zh0u{gJbev&iTnS1h3p(pF$?`aC^rhJj2lK`5&HHV#_?kJb zGMSi_SJ(*5xg|k>>Dvgt0#5hN#b8)>x5&pj4Wy_c7=p-XQ=>p*vRykohWoq+vj1uk znu?X~2=n2?uaB_*+Lr;+&434q#3lhbD9@_k1Te#nwy}MM^TTHt=B7p23Hvw*C##@< z$6AnfJ+Ri~X^`J(;3$v;d?J5C5U~zQwBA9#k|t1Y#>7ZrY#I@2J`|kfQ=Sxhc*rH| z{varkusu6HJ$Ca6x^v$ZA6sX;#AVi73(ebp61*3)LCF6yToc0LMMm{D%k+S_eJ<3CTZgjVEpgE=i5mX z0o|kFlPT7$0gM?NfN_Wk=T=zCXFhtz_fJrXuKFQ#uaUzUCWj%}$pz$g05t#ar{-1o z#ZYh6o&A&s>>NA5>#m&gf?X>M)bj>Q7YY}AR8nPC<0CJ`QolY!M*@PhNF4%4$5nFf z4{VxA-;8{~$A&>%Yo@~y4|O}IqYemSgP7Sy?d}}+e`ng%{?_hDUhCm`I`hP=rda|n zVWx~(i&}Q|fj^k+l$Y30zv6ME&AX7HTjy~frLaX)QgCMmQq3_qKEcRyY7nk_fa}Z$ ztrwMjNeJ|A@3=y7o^6LMBj@LkTyHm7pK(Vxq%M=uXr;M7{wWsrG~I1ki5OQ6#92Ih%Quj|8Z|qUzyy6 zUf%s*-I*73e%AX}cTI5r+ZsgVR1jr6I*hnu%*rSWqzs(T0KD7A4U}76 z)lH{eBF=pRy0q*o<*iM4@ojv65`y{#TKm=!5+7PwC>z)to^he4BI9`z60IYcFC8XC zZ<65C;OV<=0*{u4*i@nn?J4m6_p_jauY-;RSof^%yxer|uPQvyzOCP1x_-}6H;)~6 zkQH$^6A(lu&B^q)5vwSypjGu5P`Y#UdzM%Uhuh>vlisoS7c?a}|1hah-vo_i`e5;! z93hb``au;ow+t;(wB3-=ww(pgb`ZrEODvFvfEiQvXaSX6+A0ooWdEx3u-oBf9V((3iwRO z7r|AqsNjl$(oTUVvOf^E%G%WX=xJnm>@^c!%RBGy7j<>%w26$G5`?s89=$6leu-z; zm&YocPl2@2EDw6AVuSU&r>cR{&34@7`cLYzqnX)TU_5wibwZ+NC5dMyxz3f!>0(Y zJDdZUg*VS5udu>$bd~P>Zq^r)bO{ndzlaMiO5{7vEWb3Jf#FOpb7ZDmmnP?5x?`TX z@_zlHn)+{T;BtNeJ1Kdp2+u!?dDx4`{9omcB_-%HYs2n5W-t74WV76()dbBN+P)HN zEpCJy82#5rQM+vTjIbX*7<~F)AB_%L*_LL*fW-7b@ATWT1AoUpajnr9aJ19 zmY}jSdf+bZ;V~9%$rJ-wJ3!DTQ3``rU@M~E-kH$kdWfBiS8QL&(56OM&g*O73qNi( zRjq8{%`~n?-iv!fKL>JDO7S4!aujA}t+u6;A0sxCv_hy~Y2Pbe53I*A1qHMYgSCj0z6O zJ!z}o>nI#-@4ZvRP|M!GqkTNYb7Y)$DPWBF3NCjNU-395FoDOuM6T+OSEwNQn3C`D z-I}Tw$^1)2!XX+o@sZp^B4*!UJ=|lZi63u~M4Q%rQE`2}*SW$b)?||O1ay`#&Xjc! z0RB3AaS%X&szV$SLIsGT@24^$5Z8p%ECKsnE92`h{xp^i(i3o%;W{mjAQmWf(6O8A zf7uXY$J^4o{w}0hV)1am8s1awoz0g%hOx4-7 zx8o@8k%dNJ(lA#*fC+}@0ENA#RLfdZB|fY9dXBb;(hk%{m~8J)QQ7CO5zQ4|)Jo4g z67cMld~VvYe6F!2OjfYz?+gy}S~<7gU@;?FfiET@6~z&q*ec+5vd;KI!tU4``&reW zL3}KkDT;2%n{ph5*uxMj0bNmy2YRohzP+3!P=Z6JA*Crjvb+#p4RTQ=sJAbk@>dP^ zV+h!#Ct4IB`es)P;U!P5lzZCHBH#Q(kD*pgWrlx&qj1p`4KY(+c*Kf7$j5nW^lOB#@PafVap`&1;j9^+4;EDO%G9G4gK zBzrL7D#M1;*$YefD2I-+LH{qgzvY8#|K=-X`LN578mTYqDhU}$>9W&VOs z*wW$@o?Vfqr4R0v4Yo_zlb?HKOFS zU@WY7^A8Y{P)qU9gAz52zB8JHL`Ef!)aK7P)8dct2GxC*y2eQV4gSRoLzW*ovb>hR zb0w+7w?v6Q5x1@S@t%$TP0Wiu2czDS*s8^HFl3HOkm{zwCL7#4wWP6AyUGp_WB8t8 zon>`pPm(j}2I7<SUzI=fltEbSR`iSoE1*F3pH4`ax^yEo<-pi;Os;iXcNrWfCGP^Jmp935cN;!T8bve@Qljm z>3ySDAULgN1!F~X7`sAjokd_;kBL99gBC2yjO+ zEqO##8mjsq`|9xpkae&q&F=J#A}#1%b%i3jK-lptc_O$uVki1KJ?Y=ulf*D$sa)HC z=vNki?1aP~%#31<#s+6US0>wX5}nI zhec(KhqxFhhq%8hS?5p|OZ02EJsNPTf!r5KKQB>C#3||j4cr3JZ%iiKUXDCHr!!{g z=xPxc@U28V8&DpX-UCYz*k~2e)q?lRg<{o%1r;+U)q^{v&abJ9&nc6a32ft(Yk}`j ztiQP@yEKf@Nu3F;yo9O})Roh9P08j7@%ftn7U1y;`mard4+5 zB62wpg$Py_YvQ!PE2HpuC}3el-F3g{*&a z3q{eLy6Xz|F+aMrn8R8IW2NZu{tgsyc(>*TdV79@?V$jG(O+Iz2rnDBc|1cK8gR$Y zthvVTI;(eYhOdjapHe=9KI`|2i;{VIfvnR6`qof=4a=(BTZkev78+6GJW**Z!|yvS zes)T%U573C~Hm`&XJzE=2t7tFIZM`!^r^&z;W?dOj-N+a10^>wV(l~2naa?s; zTxU{z;Go|Ve!vUjUrZ$B#mWH)NSdxi;dWa-@w)-$wBOpo`DEG<;C#W||W}&@z>C`*j9V|`ai)z*2PG`TZt6T{a zj!#m3`Vz5R9wJkNMsJ1`fSCS2mHnizWDT!G0Ukp$%*_^X1=k=%mmO$^_0_d|kc8ek4_DZwomL(>GGtfEB)Wy&cfZ@9-T|hAq&fx;XR$$_yl6iogcR{u zm9g)axS6=_IL4=wQXf|EkzO68$Ms4*JXAt8gFxLCibt^C#C|I|v|U{%A;+NaBX-Yn z`HAmP*x5Ux@@Wkpxest$F~K8v0wlb9$3gHoPU(RMt+!BfjH?`8>KMK|!{28+fAk%6 zWdfyaD;Dr~`aJHn0}HIf^Y9*keGvm6!t?o%;je)wm`Dm$fN?YtdPI7S=Y23+15L{J zr;n3MYg`<50nW^`BM$&M(+PQ7@p7Lvn(kE`cmoNS7UkQmfvXQBs_unhdfM){k`Ho! zHL0#a6}Uzs=(bu;jnBAu>}%LzU3+{sDa6~)q_|pW1~*Is5J(~!lWvX(NpK_$=3Rbn zej|)%uR0imC;D5qF7p}kdg(-e{8#o!D_}?Fa<&{!5#8^b(dQl40ES%O_S(k8Z$?Hs z;~ee=^2*5S#A*gzEJgBkXyn*|;BBH97OOmvaZ>&U&RfU0P(?jgLPyFzybR2)7wG`d zkkwi) zJ^sn7D-;I;%VS+>JLjS6a2bmmL^z^IZTokqBEWpG=9{ zZ@<^lIYqt3hPZgAFLVv6uGt}XhW&^JN!ZUQ|IO5fq;G|b|H@nr{(q!`hDI8ss7%C$ zL2}q02v(8fb2+LAD>BvnEL8L(UXN0um^QCuG@s}4!hCn@Pqn>MNXS;$oza~}dDz>J zx3WkVLJ22a;m4TGOz)iZO;Era%n#Tl)2s7~3%B<{6mR!X`g^oa>z#8i)szD%MBe?uxDud2It3SKV>?7XSimsnk#5p|TaeZ7of*wH>E{djABdP7#qXq- z7iLK+F>>2{EYrg>)K^JAP;>L@gIShuGpaElqp)%cGY2UGfX1E;7jaP6|2dI@cYG%4 zr`K1dRDGg3CuY~h+s&b2*C>xNR_n>ftWSwQDO(V&fXn=Iz`58^tosmz)h73w%~rVOFitWa9sSsrnbp|iY8z20EdnnHIxEX6||k-KWaxqmyo?2Yd?Cu$q4)Qn8~hf0=Lw#TAuOs(*CwL085Qn9qZxg=)ntN*hVHrYCF3cuI2CJk7zS2a%yTNifAL{2M>vhQxo?2 zfu8%hd1$q{Sf0+SPq8pOTIzC&9%Ju9Rc1U9&yjGazlHEDaxY|nnS7rATYCW_NA&U? zN!7-zF#DXu0}k4pjN05yu#>x8o#Jx7|Fk=%OR((ti%UVKWQNH>+JhH#ziW1hD=rk* zD#1j?WuGxd-8VqG@n_Lqj^i=VBOg@GLePo0oHX9P*e7qBzIs1lzyp;}L3tP1 zl5;OiHG&-flQ;rYznH%~hz>fuJ!n*H#O)3NM3`3Z9H|VFfS-_xHRCuLjoIS9wT!F0 zJ-kV3w>7EguDzoBPxW>Rra0#+Y?;Woi7qJ1kpxTad?O?^=1cG@GeNtRZRi8_l-1CS z`(#oF<;VYR(l(gHIYH$y2=rj5m3QL{HQgbW9O!TU*jGj!bFazIL?MYnJEvELf}=I5 zTA6EhkHVTa0U#laMQ6!wT;4Tm4_gN$lp?l~w37UJeMInp}P>2%3b^Pv_E1wcwh zI$`G-I~h!*k^k!)POFjjRQMq+MiE@Woq$h3Dt8A%*8xj1q#x?x%D+o3`s*)JOj2oD7-R4Z*QKknE3S9x z8yA8NsVl&>T`a;qPP9b7l{gF&2x9t5iVUdV-yOC12zJnqe5#5wx0so2I)@8xb$uPG zNmv=X)TjpHG(H!$6Xp>)*S}r538R99Y{Pofv}pAFlUK;xi{E43^->z1srWR=J$8N! z4jRu;EAiLG9R$5#{gR){5?o^W^!t140^f=vCVSs@vK7#`-fv`P*WV|>nX610pK08< z>r#{r)fR?2pNG}8o)?uvX#UJI)YM5CG@0E8s1lEV`rom|kBmf={%h!o|26a=lNJbX z6gkBS7e{-p$-Vubn$(l_IbwS02j;+6h2Q5F7P?Du2N!r;Ql$M>S7Frf*r3M`!bvWU zbTgl2p}E<*fv?`N8=B71Dk03J=K@EEQ^|GY*NoHaB~(}_ zx`Su{onY@5(Owc#f`!=H`+_#I<0#PTT9kxp4Ig;Y4*Zi>!ehJ3AiGpwSGd<{Q7Ddh z8jZ(NQ*Nsz5Mu_F_~rtIK$YnxRsOcP-XzNZ)r|)zZYfkLFE8jK)LV-oH{?#)EM%gW zV^O7T z0Kmc1`!7m_~ zJl!{Cb80G#fuJa1K3>!bT@5&ww_VSVYIh_R#~;If$43z`T4-@R=a1Px7r@*tdBOTw zj-VzI{klG5NP!tNEo#~KLk(n`6CMgiinc1-i79z$SlM+eaorY!WDll+m6%i+5_6Mc zf#5j#MYBbY)Z#rd21gtgo3y@c(zQVYaIYKI%y2oVzbPWm;IE#Cw$8O$fV}v}S%QDA zkwxW{fa#Goh1O|+=CF3h3DWNw+L^ly?BNQ7DY~Eca}5nt^>p#3cc9s3iDub0nh`Wy z?oH|dW8-HG@d5E@U>NWPjnhTjr7C${Iwj#;F2G@++N=Y2tjV;z57RNgE|kXQC)1h- zx8ODU>kk};J8KiSUx5jSsA_XPou1OH8=R~q9{`r>VnHkU6A=!zNOH8IGJoO!+bQys zDS2-H(7+Jfe+&zf#;OSV=83I|^M;0`Kv*#4%%O7x>@BgGMU*@ajUvY>cYw^`*jm@+ z{LZ2lr{OTMoQXn2XUsK-l72oysi9vgV4Sux^1GsW6zTV;?p#J06EvSVyUq5$f4kq< z{Chq5Z?I%ZW}6&uL+f&0uCW#^LyL!Ac2*QRII5TDGfZ43YpXyS^9%6HBqqog$Sal3 zJjI$J+@}ja9Xp)Bnbk+pi=*ZAHN}8q@g$$g<6_4?ej&Rw)I%w(%jgGlS5dTHN`9(^<}Hg zD$PbZX+X>;$v4NjGJxMDvVBiIam$cP-;h0YqQ{YgxYn-g&!}lHgaG3^B=>Z!D*7tp zu19e;r`u*+@4h41Da&NZv$qy-i6#DdI)EVvmKO*PvIKz-9E5R*k#|`$zJza8QJ)Q{ zf~Vl+I=8oaq)K!lL7Et5ycH;m&LKIvC|z4FH5bo|>#Kg5z+Jy*8Ifai}5A#%@)TgPRaC4f>Qk&} z4WciN&V(T~u^xBgH=iP(#nd;_@L&`7FUF>Qm-;hOljv(!74f&if;fz2Mg=b%^8$^C zna!2I&iCz&9I5ckX-5mVoAwz~)_&b#&k$e+pp=U2q-OjkS@yZ8ly1$2Vh?}yF0={P zPd3O@g{0L=eT-Dm9?imeUP(!As&DJ_D=5lwQ=3)XWXg)12CoB=-g-HX9RSXgL;yo0 z?$7z8Sy9w?DvA^u`Fnl7r_J&_jJ7claq*2l9E~#iJIWAPXuAHfmF3-4YjFYhOXkNJ zVz8BS_4KCUe68n{cPOTTuD<#H&?*|ayPR2-eJ2U0j$#P!>fhd(LXM>b_0^Gm27$;s ze#JTrkdpb*ws{iJ1jprw#ta&Lz6OjSJhJgmwIaVo!K}znCdX>y!=@@V_=VLZlF&@t z!{_emFt$Xar#gSZi_S5Sn#7tBp`eSwPf73&Dsh52J3bXLqWA`QLoVjU35Q3S4%|Zl zR2x4wGu^K--%q2y=+yDfT*Ktnh#24Sm86n`1p@vJRT|!$B3zs6OWxGN9<}T-XX>1; zxAt4#T(-D3XwskNhJZ6Gvd?3raBu$`W+c(+$2E{_E_;yghgs~U1&XO6$%47BLJF4O zXKZLVTr6kc$Ee0WUBU0cw+uAe!djN=dvD*scic%t)0Jp*1& zhjKqEK+U~w93c<~m_Oh;HX{|zgz=>@(45=Ynh{k#3xlfg!k z>hsq90wPe(!NljYbnuL6s`Z!wQSL8|(A*@M8K>`nPJ<9Hb^ zB6o?#^9zP>3hp0>JAite*3N?Rm>nJ1Lpq4)eqSe8KM_f(0DB?k8DNN6(3 zU#>-{0}3~vYJ7iIwC?Zbh@aJ8kfIvY%RveZltThMN73#Ew}jOwVw+|vU5u-wMoo9C zO(tv#&5`DOhlzunPV?M~qlM|K74x4cBC_AC?2GNw_-Uv&QtPOj(7L4NtVh$`J%xci zioGVvj5s|GY886)(}g`4WS3_%%PrF(O|s-n&-SdfbssL`!Gi7Hrz_r$IO@*$1fYbQ zgdp6?(IUaNPaH7}0%U|9X8HFonsJRrVwfmf*o1;k0+PwV^i%f7U{LAayu`!x*FmhN za(#a^@Idw9)jN)K!=sFC(G)ZNaYY169*IJ_ouY9>W8tC>S&MEp$+7 zy)NFumpuE>=7T@`j}8pa)MGpJaZoG(Ex3AzzH>gUU^eyWp*N2Fx+9*4k~BU;lQ1PG zj4)_JlelzJ==t*7=n2(}B4^^bqqcKFcJ7yVzbH_CWK?{eXdpKm);4|o{aM=M&`E$=_~PVi2>>L zKTN_x&qA)@ak=v=0Hl5H6~?LOfO@1+fu5(sB|VWID)w?%{m+n#7bLaszEJ#;$HMdt z9qP0gk)hIYvE1!jseA^FGTyK=i4eTPjTL$R;6FywMBZBPlh2ar9!8wlj1sinLF-1g zR5}hLq>pb1|AC-WcF!38e*kFv|9n<$etuB=xE%B=PUs}iVFl>m;BiWUqRIxYh7}L&2w@{SS-t(zUp`wLWAyO=PEE=Ekvn@YS*K@($=i zBkTMaH<&cAk${idNy0KZ8xh}u;eAl*tstdM8DYnM5N;bDa`AB+(8>DqX+mj17R2xBp45UES|H*#GHb_%Nc{xWs7l{0pqmiBIPe@r=X%Y-h<-Ceo;4I>isrw1Hd zZd*VjT`H9gxbf{b3krEKNAaV$k>SzK(gzv}>;byq##WEhzTN^@B4+VJvW>y|U}}AQ z4^Bdz9%QKBWCy+h$I?L@ffl{fLLL41Tx|M+NjjRf(`KjHG4^y=x3l z!!-{*v7_^6MiJOC@C$WV=hz9J^Y^lK9#tzs6}-

    Gn4F+B~IivciU9^t0j-Mgao3 zSDF_?f~c=V=QJRSDTG0SibzjML$_?2eqZ;J*7Sv$*0SQ|ck$fX&LMyXFj}UH(!X;; zB_rKmM-taavzEk&gLSiCiBQajx$z%gBZY2MWvC{Hu6xguR`}SPCYt=dRq%rvBj{Fm zC((mn$ribN^qcyB1%X3(k|%E_DUER~AaFfd`ka)HnDr+6$D@YQOxx6KM*(1%3K(cN)g#u>Nj zSe+9sTUSkMGjfMgDtJR@vD1d)`pbSW-0<1e-=u}RsMD+k{l0hwcY_*KZ6iTiEY zvhB)Rb+_>O`_G{!9hoB`cHmH^`y16;w=svR7eT_-3lxcF;^GA1TX?&*pZ^>PO=rAR zf>Bg{MSwttyH_=OVpF`QmjK>AoqcfNU(>W7vLGI)=JN~Wip|HV<;xk6!nw-e%NfZ| zzTG*4uw&~&^A}>E>0cIw_Jv-|Eb%GzDo(dt3%-#DqGwPwTVxB|6EnQ;jGl@ua``AFlDZP;dPLtPI}=%iz-tv8 z0Wsw+|0e=GQ7YrS|6^cT|7SaRiKzV3V^_ao_ zLY3Jnp<0O6yE&KIx6-5V@Xf^n02@G2n5}2Z;SiD4L{RAFnq$Q#yt1)MDoHmEC6mX1 zS^rhw8mZJk9tiETa5*ryrCn&Ev?`7mQWz*vQE!SAF{D@b7IGpKrj^_PC2Cpj!8E{W zvFzy&O4Z-Exr$Z*YH4e|imE`&n<$L-_Bju=Axiik+hBtA4XNDik(G_;6^mQ3bT)Y% z6x=a+LKFZbjyb;`MRk~Dbxyc&L; z8*}!9&j0wewMM#O`c#7HJ|+Gh5%3~W10b6sdmCg3G_v+@H>n*c5H`f+7%{TeSrzt89GYJqm>j-!*dReeu&KHubhzjSy_c~BJcbaFtZWAB}~KP3%*u{zHi zVSUi2H8EsuSb3l7_T1hP!$xTtb{3|ZZNAJ{&Ko;#>^^43b7`eE;`87q81Jp;dZfC< z$BD`h-*j=%uTpG8Me6dF zrH%)Bw-a0}S41ILo*k2zn6P@?USXtC>pX*tzce7A^JD7^^p7K5kh-HO&2haDTL%2^ zSWQb2B6}e*;x?eKq?CdG7F=wHVY)Lb(kQu1R#1Fx|3?>_%cjNM-xJlAg9kr`!>&;E zTYmHhqHh&qbfO`~w3V;BM(q(_Q-5^!esaBI&QbZ^%N-ZDYft#FTS;%{ zKzlSwZIS%zDi#%DMK>`_vmE^krJL5@PmpT2m26Q`O)VRAL>){MN45|7GTk=q^zLpF zjS(Os=`#On$XI#$A5ewac9Ma}mDxSu^5{#jHC+24a2GbfBJ&Zn8W= zm=l7VE0g^z$3ikyU#ysh8b-PH(&-yZL$JV-of-ZM@~N^#DbQ3Ltlq*5@>WzSNxrRK zYl2VS8r;TT`wLfD_O0dhX9vR#S8rMOuUCRkWZE#OjRi$l*#C7}mgGzZBD%Z=p3z|CaVM$$pyW5-pJJDCToY zO3R5)P(Gnd>6wh9Z$Sr@cMXmClU(h-@5kmiBTNTU-|5vq&Fs!ah|o47kW?SO8uWv> zW$=Ud@@|*9p@Rb=!wl;%>k)kH7fPtcD=gd}^IxN^=Cg>zq^jij!f=1PlT|9jh3K9g zF~Z)B;kb^a0hLmJvON8Ho)foq-oC)&E)b|a^|b}6n!8&AIaousO^VnYzYfuijuEo5 z7IcUMbYD=vec4eZX7;p31NB+T9BOMJp9ZI9$dH1kJsJpEtf@}tL4)_*PxgdOge9_EaR!?wWtBx%*f$IGoR>f3Qf2aT0%+fq=1xVEqRl;UaA2Ncs4B1M1#foI2bj4 znX}t7;-FCLK&;>ZGP}{GxK67$Kz&pO%%J>DBMP_zZsLOmdpDUDp&f8=L>(Kcj+S^jA5dco4-7XN z)h;m#54CEy9)Ch-E7gHP@a@TXl=_%&|iUlIrQzn=LqONBu9FCn`3f8aqvRu=RrJ_RH1^Uf=t z%Ir*({+wEeC??C+u!hCi<5m`RsRO6ti7YaEtY0|U)-QfNsdN{=83K_}m$0Z=ElWyt znvo5=%f<;|hNnL-r#v5ab&S2*yK>~a7m(My$cfd*tff?=?7-j3^|&9H7G*W`)m8M7 zzd0+b)c@`bQN1-^dC$_04tK0{mU5tx_zo;&TWou8F(H_J?O+Y)VLXzmU^> zvL!5+1H?opj`?lAktaOu%N#k4;X;UX5LuO`4UCVO$t+kZBYu`1&6IV@J>0}x1ecuH zlD9U=_lk1TIRMm6DeY2;BJJEE%b0z;UdvH_a3%o)Z^wM&<$zhQpv90@0c+t?W`9kolKUklpX5M&Qw06u=>GPCr5Imvh*% zfI`tI-eneDRQo?m*zD1i;!B>*z4Xioa_-S=cbv-k_#Wg=)b$0@{SK>Mr!_T?H`S-?j;3$4)ITn$`g;J$^TppD)^pRz#^l?XgZ2CW z3g5G^iF*GZYQ}{B|H-fqh=_>)E~=3y3Zg=i75G5E)*a>R9bn~cNW{h5&P(vQ6!WHv zw1-89smtY~JnCQS(=9zM)6>UAi%G-r^LA9_HF0Vp3%JF2P%+E&^afy61yxnAyU;Z{ z$~H5X6?sMoUuOT_tU7i5i%5HI{^@#Hx@zhtP55>r_<3LwusK*SC#%i+gn&iRg z_8UN=rLVp*gT(K~{0X0f_=?~bBbfB`=XrTFn3U!)9n*@Uj$-mr^9PNi<22UJKAK&D z|1@Ck3(Ub;>68;)gIn_Zu{uoVRMhAkIqgBS(v2b2{gf?0xd(1sJfY`56mVy>~^w!wmX_kjW8#?_Nk{}zB9ULo>4fO(vnWfC+pG4>%*KZ?JuCdXu%aZ}q7pC%E50@U9+KQZL5 z!*I`SOtNf$Y$CsRsNaf~yyw^>#X_mCiF&*gr=cBb zoPu7PwX(+Wvl~i(XH|)jj@Cu+rzpJMn4kVvCJ~ReCf08viF$q9;CYnv-96k{G?pf_ zQglN`JiS#vok)~^Z2>41#7LPFgd_xrqNO%DQI|!Qs|nWt`co#BwY$&Wm^6#~)`_1k zpwiR~&z#mtSDuYm(=NoLv$%Y}bTjog$RJ8$j1(s})=}su0b?o8i28-|xu58ipFBml z2`4qZ$BbY5>(i2%wmh!+C}$97?X3LgTQ_{(SaFZvq9YCn@BNz z&h#;4h?5#`&_0()uJ;_rR(Q^eY*=&vu)#EeMeaN1puPv5+iQFg1EC(`_99_5v<1r4D ztc(+-eVWf_np;q$M*H49#{R)eIWCI%R&6F34;h9eNG(XNO5ao2MI8;j}y% zZeA>zX{#$;muhtY{_|;bkk~!U~Ih z2QUO}hk~o?sn;#|Mt$0}4=+BRa703n6>fBm(cesk8Cmugg_wi|BWj}V-VuU9jNH+o zgNYGSKPm>qR&nI(2Gu*})AOBfXf0J~CC50C!3KXu6-qZAG!VMZbmnqL6HWG>o$^sjoSLbQxra@WyKV$+_Qe}t7d)c`bpJG++ zw|9D3>XUH^Wplo~MN%WK18n3HeXoe*jKwVRK!=RMtIr1v z;Py~7;eZl&=^UyumN&CecrGBEat}4?mtZ>@`wPjVK@Z)FZ;05^9kztq;qmbxQIJ4kXTk)) zaVfD^K2x7SB6E!Zz@0p|Fkge*0(0?ogmTX8d=?n{2x)}K2$`bjDmcLg3#wU)i)by? zW^G8rRQKBwjke5zHScinRlE|wo0XyhBc9R52IsKWf4-@=l!yO&+l=K`-7Ib9U~hPy z!cH>H)e6$;m&w^0d`axGqDwBgu`B+L4a`xr#5g%b=0?c41`|lx0O9fiIVaFAsO$Ol zayhm4C9X%hzUf&ctylV$%ntuA$(yo*X`gaVX0$|x{#!YK^cvLmNWPZaTd3&xP7ny% zkn}2AdJkpAgmsh}Q$tY3(2RtO;%R*~8r#ZbSbMR4LaL9Sb6O&Ce(GlO${jtl&`n|D z9;zUQPXCHqTm&t^lk9RlZiiquSY_og^?kgVruz%myd95Fr!V z-$OIXSt?(pxN-M{NjA)j1KKIp(&c2RVjd_}7+CbQfw zTRjg}A0~}Ht_?-@wD0bI-;LQwT?mKywmDZ7*j4>4pR6@UVU3mb?-cbQt~aIG&RBjl zs-4UNtOH3+dAF%U=={qB@qijh4J6K?Et zPLlfPlv<+i>ty5rh;Q>iGFoaq4LyBIZl3L{KGUmqPL~ZCosOl;7w2SxcE}pvK;5|6 zly3JjUsvk|d7L3bFs&;q@_|p?vdU_UzhrS$Fw-_NoEdoIT#-0hKC37!>-i6FaO(es zY97)m4YO<|eqGMrYejC&-IFmc{=P7>qFWX;)}q!&e9-F59o>V+`X>J}%Te0$|A>0W z;7*>m4>udzwr$(C?TzhZqi<~6wv&x*+qP}v?C<}aI_Jeq*K|$4>AGurZe5=U>-0IX z>&2?v81(_Tn1tITYDSF@^Enhl9>e1$iAnX!+&YJVi>1uYEWsZ?o*Vyg+K~%XCxQP(WrdtEpc3sgbpTM_ zI7i6|pDr z{=xGh4O=PrB}pkX@o@A(%GfdU!c<$p#T*mLo^*7@bd4rIJ5eS&&A9VB$EhabJ1^TG z+dke8lOG5I(xMYZ`Xw8+olY0y6M)M0rcr%9tZHa=G0zICN@DQ>0rVASCK4=3OeMSv zD!v+POT0`UZEnP~1ro1?HPLqJ)xx0#Pg^yBJz@S6gmFN~cGvl(#fz4oTs7_Pi^+i_ zZP7<#ukx>i%V;uJJ~WwUW7pgq=>yuT+A5w(J5$1no67e(;mIO5>@`(U0{}+kg)B_8 zs=bfBbmZ{U`xjMpkAcEcEeF7^#ka}2zDU-sBt6yQqw&2p<+6Hb(Hi56S!+bU9AJJv*{ep2vD zG;PVwX@NC)+=6@I6J=nW6_99&4R00FKpUPepXoBVN*|V*C{e7X+Q({6O_^@SlI(9Y z8kRO3WDG5u=vmTjZ4DW89H&vNa;i%H@`{%(|J%tVs;1gDadzF0Jy%}C68|k?Zr!B9 z*lBN4{#6p#SQS-q#Ck&x#xhAOu4mK=Jxf+5E$h8l3-F4mQY^qaS5;Z* z-ddglOueLtXJhJ!%yJGk^-iZ_+qLJ zpTZn+6kq81D@^m(v$VFFI1Q!dtczYBt1xSn9~Q=@h%tsf*hCm%fwfx2u(u=-4|qf=I8WR*%`lsQ ziP!-b?(d_`TdA=^<$@(2c77&FowB0vhswM)fS>lYvjK7B_$<0SiQNzL6T?D721Y*( z9nG=@aWvmJMd%j$Jxp3-L4x99-X-9aGkW}yiPAo*9{^6b1>tDg4zIPFiTqVK$xq1rv1*kaE|~T5-jH#8{g31#^7M_uSsmQvNjyk; zbo|yP0w|uD1)wGrSavi=<;=H>IejRQlac$HMkU2rbq1{8UntI;oJ}*o(bXy{JC*l&^W{Y^}<%Nj1Tk z$(9f2a`BoyZZqxWF=hhmc3ldg+8&Ep%fVCSjopduonggw7@?XulP^JPo+_le`o@z)ofi9U%I z=~YZ3?Jok#3NeQ)U&qUqvoyuEMA?b&Ki=s%;_MTDX+8^>z@TOxb3qw~biG4!)XuQp z=>cVLGcp<{Piu-TqWLFz^P0>R1go1M41xFSn~y%8LZ{~t{iz!z$|ne5qkw!VwuI<6 z*6Bsnap!L>JA;B$u$J09!L&_iGdX<&v1jeDcEWM4&2q97^g9gK1%+zl7nY)PUU9<~ z!B??-0oFH5TEpfNW#V1m;(6-=mlUxm699O$g=ZrFZpn(6h%3n#!U7eFnC1BJzLFB) z-)SER^cpQ~AF(`0^?pNYWsz6(suJg4)Ke+|iTo4!8P8ND$ML1a%4|QMYe@SDDH#d& z)P6SOk~%xdQ?i^t{N0)(baSgQ(Fp*daGXR>=Vt-*#@)>A1Sfz0!iqKtjlY4}1i0v0 zyz)Z|vB+_QIX99Q+NFppI1+3`=qUen8NVELr!SOS8Vq1;{<}WKOhe7HMurM4mg~j5 z%|wM0)r4^=uC{9_OTf*An{G}>6hw}C=H|&8MY~l@u zmW-R8h;dJxjKNqEdGf85(5BrR>lY2A= z-_%9;IglQfHBuO%U)bt|g%1h-OMbL9H{TdFgM^rdBTt~gJ%{*c<;b$D13(ac>}*nJ zo@&y3%13-hUh^Oa$9U1ImdNfGO4bPX$I!c!6e;sRC>z{knTf~G5{#4J7y(vbrq-qWk%J5#0Iv((P!QKa6f#3?;#q$+(teR!nw%kOp&_W`3L^Xw}Dw&e2#l zc{fk56;UyHDpT@XdB?u!*)EdIMT8X1&e>VO;M_QH&MXI5|3xTbET#NTfyi14#+0+t zDS(NC?jbc{yIDjm-=9g^4*f1c;0!ytb~iQ;DSTKoa4ow@d-x3HI`EYcAe(li zjajb0cM*@u*kiU{)jd9yTNeRZLL+Y1&q`L>gx^Jj_B%sh2+%Z1d6xNVmTw5Fw!kd@ z+uT`4r(0=PXUZCNn9$VPo=aj+p${a|eqjB{Mf+k&$GEGV(lWHl#1xy1%5E)1KD$bK z0Z1Tsk4LpTn+b-iy}25uN>wvTfN+B~4r!aC19d7}&hDFchbqZ0;e7I0BK}RNujj9n zY8As>D%ez?Fkng~c1L3e^}<%h%!NhB5ZFmv4qmi`am*+A28lE6Pu4ekBJ8DW?YR4c zPeG`sZYLihHq~K3`oYvnQL$26Ojwnj1AOypgX_ca^06&6f`T8bedVhWj1y>F>d-sg zr9@SeL^T`CHIwyKW*F#~AZd==$aA_zOLRP>>S_&HK0s{HcEDpNQm9u|IZ{W%#*w4} zmN;)dX5OA?I{M$KLje0TCiQd&|g9E!YKD5 z)_8>@<$&L)EoO;WhhvUYgEDDJ8PPVpR_u`RN${}`PnjHc-4^~CwIh;mLF+#KK>Wc> zE|Wkj(OZ@zIa8-8rUq=a=x-F%J+$ozWaVUV@yS!{UWJ)}=^jM1_f&XffEjCb6H?Es zrqQ!sdrLtEHq=DIu@B|%&N$@{wC|>I`>>2EXn@+22x7PaM4p3V5XhXp8gSH8{)yq+VsXB@4DmPLA`4Qc`r2Z>3E&lVsUbpRejKO8Xc|ayAI6YT)d!q zrfQj!sa@T&5KPMxDUd4bZwub#5<;yenI>0~Zx=@R*M{S6d|Z3TAEsEW-w#undSQP7 z0ryg{By3CNOC^`$t=P&xCf<~vRz1}|>Oh+v>rBMi?&+;xKSGs;7Ie~^T>J4C9Ke&G zL&{aTYZk-|Pa*unK});DaF?Y=y73~NA0(lMPUz1G>G;8n^cmm2S>twrpU6ynN~J1! zHD!AXWk^D?nq)%#A^&d%DwIkh3Ku$<4{$Bnqe{R^e!E zD6qaK4g^V5kCJH~Ot$Im{2T}8sS28Gk(>QFg9I7A-=nDns|{X8NjAD%l(zhXxPR+i zsaKZiVQjKRN#@N{`Cm?#slb!NghtaUv~`T@mvslIbq5TcS-15muB2Hb$Zs``b(Pmm z>-keg*068f|SD zm-1~aS@!4?{PuWQ(%MlB?$oG~Y0UBQX_Nz{MC3%JvnoK+x5+GR`cIfTOE7r3_Xi|f z(1x{Bqg$A^m57WLbkEAc&hWkBABmV|cqNS(`o`}NaSI8Lm6{l$b%3paaK-^r1yrc* zQM|lY+je@P=AS7fX6VXPV>UYV77X|5G z5Zow(9=j+q0*H%#H}fpu-HF%`(GEbvHmWK({pqfv^b!p^KiWxjYXL)gZO^yLvY!1#{eH$?|l`7XcETF-V>)m#$Y-KUauf z^b+<*r?&Mks6o?n2JrEvgk?j+9|~S~2U~dq^}6M%or)_T?%jaFi!#+q3>YaIG?m3X z;{>&cQSHf29MCWgsDR$xyTZCe^~uYQ{iM+(@1tKCpyDxFoeVGQeW)9uT349)IDK!3 zsmbQfykCr7P5@r7$@N8b6KjN-vAfM%rz7|bveQ2v`Y|)B{2rfRwNw!r&1%%b*lWIy z+l$A~f%;yYgfY6h_(-1nXB!C4(VAsEqS^YKh9a{{_uW8t$M^?gPsm-J}^#E z_uO7hC+?sb1Iw^TeS$QC`8qwrX85eSYLIFX93I>dS^)6QIMdwX$;6F>2_T&M6o;jL zp&W3|Bd8rLlV}iSVY9G7Lo?V2_E`JVM(`rw^}DX9)wk0Q5GJ%esB@}u@C>dZ-byh| zBFz*MoXGGiF}DG?h!UZ#FN`;~1bd*pAWflMa5AtD-+Ut8Ymf#=b`potx5YLf&A%ZwGv$|Si7 z(0)Re$(F;{=Dhtq1%wCl0ijfk+T4jd3}^2Z$Q?L=1_lkM&nIax-Yo%VqZk6#Et%n& z0S9_V?yja0r@wi$m!-JJM2G=aQ@nYectR_Ln*dN6gmAR8L^dIf-bxR>0A)c$?#Ug@ zVlrY8#6Wp4wiP3OZ1@T=EBaaz(jrxuLG%?*J+=c#K7CorpL5*eKWVYiw<>#a7zv(N zO^RpkPM=xn!2?&s^7NCTu~a+aiGwc^_4Rnyqj!-l3-f+;6mkOx5@ynO(YF&u{yH5a z0{{W^{1E}V-LFeZcLzkH=SpZ_y1l&>1S=X`+@!Ai#KmNT?5ox%_;tp9`=F^;&%fxn zpX4I|M!d6`y%-8hequbo4%INVKruc+o|NwhsZB0<&TBCe}v2@CyI^$jlCsTrwmBFnzIMofx8PeKa1Av-Nj zlLtw2SI?rq_1(xc%<3sF%)ZrYIf>Xe7@jPt9BWoU%bg~g+6=1f;eW00nOrbo#*(mjYHCr_?8!#my~|i(0+2j{Uo+J%%rvg+%X5* z4!HCVyg~`t!LBG+X&89L&@QkGXe};GQ^moDsqI%U>#?IVQc53nUukdN%ij?m+%#Fv z*$`n_GFdWHC(!1z-ZhRjEV&n1wt#7VUXkgkW9Q5V;)k`XOO{*>9)xi@4}6zxlm4Ck zPC4Eq^0qB+yLg@{^VCgieuns3B!x#NzSr6q_VlhP>I4gzH4BI}DTx^r5(>Dyhc;-w znWU^i-9$N49%O1eIWyBV{K>wROpYjgCc5b?os*f=l~V;o)CB3G-E7LA7Rg3;!)~m@8(whM7Es zwF%4mEd^gMI<<|N60&DB)!+6-+8@EFbvGs4UP0$q5NEO<7?$NeaVcvz#eXkrXV;$H zPjNrI8gWTpphtwY&md>1N7T|$T^i@CM$EWZ;`6{q__Yr(^B!<>OPXT5%ICC%;4jl=T77^3T z0A$3`@j>`8*wH>vT`en;tj&YA60zbZw2F#^jE;rfTJ}-rcajHddN|Q>g}o$TX~osy`RPP=q0j_f1g@QgXPlY@q1Jh?-r4bB@~25Cj@AmJph{QR^Ya<4r(z*{F~ z=-nsVQY2K`sKEl*CR=AMEDIZD88T(wtjZ_((xf$>SIA*D#|jjfGw84wta;Nk03w~g zI(#i!OQDMse#AO065D@_gm?pQx@{rBjMat|bA$6MfVPq;S5zT5IKK&|LFZXuA zqj(kJK8jP}^ZYm?74hlPtf)m?w!rUP42d;f3Xx1K3raV-*P;*>hmzjAkyfcbEfZVM zJuLMoUQ0*&6p_BS@>f9!k`6HtNO_~}(0Jkg|_f8#- z!m%Jn^dX^G#qp$LnY0H)6WbFMeDL2eCjALoKs@6Ai81!~l3d5bNgZQ?f zTgufN#)|A&im|)K13cIGc?~(RCQ+E^pAR%xa6I`LxD$=mcOf z@v4=zb!i^TVJ(CsX?zlhk2fs((qe>+8Y#o60peO430M?7HT|g( zcVfD7@Ob>SyV%mu6}7g*=p&J}hJTo9hFn2o9Jy}QCXfAbC}WgpkeMXs7QNle)Z`PI zaU4~Uz`idIpQPmpq$?{N(5Wj_y%UX!5{=9|{BFV$P&Z}ciIVj<`zLyWb*T2wf|8o* zOk|-Qs_aJayia$?0k_jr6b#)1ONJ!Z;{~4NDyZJ6id*&SjT|kFCPH^!Q8MlaAE-*_ zNR!vqG}YZ6i}M3h>ENPmCHxC(#1( z7}2c0*RmVw1@+)M+n8t~gQT#+Yg3>|OA<9`Ynl5)ftY4g0EGA!t?E*;j*jRcB>mr~ z4f=etCrR1X;V_euWY<6p_AK%IoHB+bS8vl&LZ-5Q*QvzmfHq zZ>>MgWVvSa-wRV7cJ8O%vi&R+@2I&X=r`1P1;x8lhOpY4Z58^@Wm+--yBQ{&>GOL- zIJm(euOw?WYjBR|f~ue4(%k0i{lp`gI1~mF;g{;-0_gdf@ z*Q?M9wQ1ZdZwvrK|IY39={n^R^(zI|p=Px@ff|e_NEBug4N0vK!L9-J_DIiI7e5Pr z^Sce&Prjs*$mOY7Rf3V+?poBWP^ki{PIa+)OK%4)E`rV zxx7V^Qy14sZ;Dc2jD|ccyt5(5Zp~;Rg7N_IwB&EZ1jv&GoxT!1H7k>pY>Aa{$&oHg z`ykhr&GpvCL?|Xb;O}(ErzQAl=DZgICR);;Y=xkO<~chKzvaND<3}Wy~d>W0L>Q| z2-}wM73&w!hC@XZojB#$EnGzb4HAp3FWovUq|4f%x4KLKUg6YfVpokO|+JO^JSzIZEji>8`uBI~^1wYq9L`S;8*pu)y zTN!cO5)p_vO7vsEgglr#ee5WTiRh}7f0zLYNA)eB;_ z63%8_pGF-Dnkx@eu`dPn7Z1~vMk@*nIMW6HtpQX86HiyI1H>8W+4Y50C=@;!{F)Za-A9+#^G9aiAu<-#DuLR>+Vm6|21n$W?isfhl9KnurA)AcxJ* zIl$Iy_sl)Ewu1nV)Wiqc6M8RZ-OvG~x&%#S9h{L)QE&q|7$gk|*5h2|^bAvwHm@~P zRY4`*Kw4vB$#(Yqt2+Rd{vNGl*GA$FksiM6%fjfp!BEgA!3EEIq!j+(-cS%{(44@I z+KuDSMAy-fyJ3j}-3vV|_^?zVAkrrzw!3@QF<9e~z*m55Kjm<#D3z(4wCoyq=E3Z+5+o%*c82=9Dn;-mR<5ukCVG}$pfS0a zGXdRdAa-u4>?Cv7*|^+XrkWQGzzvT;h$l5u$vMI>9ouxPD^S{5-qvWAprQ>*&?#SpxdJ-SE&Kk2hn zy8lWI>IKrj;hSj%<-bXl8V%B!q_?jcj{k-hy&J%P3vb%^Qfyv08YOw$Qv~F2IOcFi z%I^ScI`VdU!El-&Werf%8X2asF7Tsk7{xt!qlOL$mCejuXC38O9pJ8y|M>$P50HUy zhcG}uKWP7NB@OTY;fq3kG@GPwLy>1x#YEu`vmQ=(0K)g*ckkeaAkM(C2nZ)rJS}8_IMTxIBXH|>190=4 zD%!`?a-E!T;jSVXMP%ETk{4ij&~`Q)&DZieRx)rLfXGfwvm9#PvZgMyX7+TpsoXa= z4Qq583C|0#1W{@tX6kUwtN40v^oyycsiqPP<(V!5f5bA~B0ZGZ{CU#4q>RznC|I_) z7I8BytRK$$wnfi79s*Phn%|0s_u9`zwWi2#=GE5F_sk({H`bq&(QCDy^X97O7~dVV zjm7hN0FhFY>Zr6d?l;%A(Z~&Ew$4)I4_&92>1%LB&Iz>(85AY z;VB`o-(qZZj2^wUL9TY=pDZ9{|L{Rg0eiHZxKR(>6I;B}xV?kpOG_~18o5kM9>bF; zvl22sk@FP)d1Mu!iPBd8n%hqPUH?B{lf+vBfKDaUjH};FB`hI|=TD}i4-Df(W|+FB zCt09JV@dNOy}=s3AS(U4&Ca^LI#IkDbY6-0Iby5ba=y`Wp2hYzhwTE5+|7W}HwTbp z9OzNwQYpe;mIt%rDX*W89h~mxYK3jmf-7Q*)B9kUP?Evo3sn(X81NyML>*eVx+RUlBPA+sDViBwk z7*Dl;#i5JP1+7=3^WriySJy*Ub#&|n!0jaOtW}%-grYW2t+eT{wz)iu1P?+?*78D4 z?m5`fN!6Uv7J4JU)^8tW`D-N9QO%RdtYTA8+bXhEgPf34?k{g{4Tq?|%C$Kz+U{9j z8RcUt*R}dKX*G74+BGaNebZUV{DCm;@U(5XnJYWyX(1gNvxR#br(Qa6)^hmsfX#aR zk+}yFE?Rp5@=+8!0rVoYMrk4eHt6+-pV!|CZFOXL81z;&nOQ!ct!B%hYyCe z$8CC^HadwLAC?`$JgYtvu%$b7`9Y=%pqA!R6Z96z- zLhL(4qE89OG&)oMjo05P>;5?Mp60` zPWdJ5-2@SE9T{-ytDRE{6sX)|Y1X;+C@K>yY^}14Y!088xh~SPfbJG?M1tBi?E>u?zdU>G{5+S>|$%tGJB zQ*X_vOy)g;@fbPm0a(Zh7zTzw2Ct$FB6Gz7!tmK*tZ2h588F#jY1p`jSJMli*7u-; z3tSU(fscAw1h}5i`&i`+?4UAF;AeV|b}3)i5zA^E*L0X|u;#%xYNx~?#g6jEh~;8t zQ8$5Sx)(-Y-j-9ugVW%b2(t*(k6(`>S>s9^t-podjkrgd0G}k7#${=(J0T7``%9)` zbz@# z89pMA4}>(ymEcPbh@I>#D9Az~sbv{(OXEh+fnx{b z6H8ULM@UCCdJbtvxLPl+w?prh49<(wWQ*(&g-1S%fFdrWy;&bp2wdG!zXt0n@O|(h^&64U7Am>%tK&1tn{(CN?9?pRJVbV0abQse6W* zjaunJ1r9_dkDSXE8y~{blX@E9+XdZr?+Cj9fSv4Dr%sM0X8+%}yVNrc%}Pks zfLfd-a~NL@9Ae&`->H9ihbrSTQK7`l0(9ei<9)-C-ZjdIKdOKOVrZbL^1x5+({hmz z^ka^IzOo7Z5kDX{UB^aJa=ZJ664{}im=U8r5}V}6e33gr#%&kPksN&;R!|y`-hx0+!ub!fTfgoWJ@3*jQ48CTp{?Y z$+bKR>!aBjD7x?Y0>>e`M#1*rfv0;edmByS@dJq0U>!j z12B#0J8%)E#AT3Tv<7hwsa2De$TgZ!6ya*gBbt8{dMpCoYg`{48qN!f$4KFI>9kSj zXqP7qQXV6DfRu{Jr(Mj>;=zUW>U{0sd8$z^(2$UE1b=z(K3T=YUsL(r3UwB%vS_@i zUw15;g`ql@wnozVkC>v|rqdrPO1t2>x^$SM@_>ucDEgntIq=60A2|p%szF-JmH5_! z>2S4sVX}c!H;5b!MnOy^fZYTP60VDhA{ikCTh{$>P4GK|N)1u_VGJ22k_IyXwj7Sj zcn5~M5{rQqE`|I<$3Bj`K#{b$K^z(UVwE$D46wB&kBgN&?rjSskPyQ3X&G^Acx^iv zW6lXF-}{o%ux^olbi{%ZmZM_C=6u(%CKQ={xs{jYqD zM26k$`Qj{UlW5Jt`l&1QP|d=7B{Dx;qd$8JdU$AE5&l(!MUkXC0mFRCM3JnDw?zVe z7`mm7)u~!VZs$|ahb9Y>#(9sjOV zcH~0w!lwVVM3oxLQd(|~MDZCpxbXh7qmbj2l;)N4J+?HVc6Jx7LG<@F&tGUvek#38UUOBInuVP22k}b4Ep?bEu^--cB#Ag|hqHNP79!T*v5&|g?2bQG86x5lB{ff(Rjr7|;rT&I0Ef(#dGARy zq-)N|z^0X-fAevH$bL+ip~x^dH#=T?vKN@HF~)7*3?~kd(`GwzGp*%S?H7db>`8F> zgx!tP`bl5-7lQ@AQ4i^?mNUb^ki+(Qvxg{R!^Ut%ya1_K$Ci-wGtO^W+(5We9^Z|i*}v@%bg{vBl7i??boO`xvQUh$k~C|d$i?y7U=W| z!<=;Y;tf9FpB=nOaU(_U#7Npj4id5?8H4? zsL^r@1_p9?VMR4cVe#mEOOH=f?>dB_m{#vzpM&E&KVbxd<&r?NMbz+F*duzV(?Y8LUgUpO4?&3)QPk z5&HoWONJr}EUHfHzJW4vCdqg&<>PN7f)paE#1!i^P<-8JfbLD7%T`A%By{h7P)CAW zJ1E&XBE96%#4a;dwNYQjcdiR0Nxh?uH~|2q&7C9LQ+QSv8X^PP0>Usz*HSS9C0>to ze1pO&s7BCS{x!VW_Pg@E-%TErJGYbnQ2hXL%RBzBNmFecgMmO#_uULhV~c2I)KHP{ zv{Eui!aMjaX?Mf>WoHp0KtGR^e4E^69*4@*{%8^>HwxUFNcSt7W0h7X$VzQ5JTGQg zLpd?yN%(bgiP_o-cst z@QA_VD0&n&*dj?j63J-vndy~X;lwmo=Q_8PV#w^VZOiYw;}mS|B;|u)e#GS8JRqxP zoWEuBMb#F=PknRG3P* z4GJA~MMpEbM%i4(YahXGEOSo2nB;oM z*5&1O`U}@hdRDps0PqD~2c@$6cz7sxmZ+b)O!Nllqto*I#I^<9nQ}0`3gtZjgFSc` zr<;IuXQCn=vP25FV3h8Z+}TdG6Sel7VCP+9#!U`9SHR~u*QtV&Ir;S6Z^sSGm|s;y z-f{CTn7y-&!B@eo#~6{h(77Nh6dHLyQG)b$p_3Gj)aRs!q6N>lUC*~^HSvWstrW}u z*CU=O3^xF*0&%aIQS)f~p!Vfgr70q9_)Pqs1=T}zL2n7bM8o8g#*F|Q%n>{#zGI3aoM5ptgqb|5#Q0-fuPveFm}*t#6J>nQI?04W zddadPl-27!^`1tRpwAVEqlr1diwI*)RCifevrPbt5Gp@fxs&zT5 zsb*ne&_BG~c(7H^P%7ADWn2!iMjp*h2XH3HT6VU72#$t`4=n-ZMCj(Lx2fTA@Q*v3DH1nr6oj-PQmZ9zCOcnn|~y1H8R1_aO#cRLv8n zA^SQ>qnD0V>X0{ZGw#)({*;uB(U$-bb3>y#gPQ0j{V0TAh2!q01pnET-gA>Z&%Zu& z{QmIumszVzi2m>gDlumvArvK|eWjErehNwr_*YQB+{U0n2iH{TJ z;qL1>Q|tNR;tK>w-Y~Xr!pxa~?@n`+EF(yvE$iV|s+c}C9kp5-ApELWNNyD z|D+=Q7PY%KH^%y&U#ewXB(vfZd=y2g6mLmY^!M=zO*K@jEGVFm+gRBYv6`7`j!j#_ z9w|2DzzCJJ^>~J#5j;E8*py74CK@&dIy0mkEqwTPE}}scXFHs_!v+39v(Q!~u%}FWO}FpFHX>#>99{bVQXu z&Mv05icalrL5O4IcpQ-%8V0q0)*4^oV6E1=wCFNkQG8D|Vcl#K3ekLmEmuno2}tcn+QcBWaoDND z?$>_WkP~3jJBVSpFIV5PxKA;nAt-PpDTxDvS|U0B~sCx$DrPuUWy1s-9;QX4FU@5U37&vhcuXyFpWC$dZ2bo2M?j zANK_Zrju>J;S;e;$Q-lXs>AJ;X+V(MnIVQV<}7RvF2tip0dAnk>SJRl?)-~WoU!77 zQ=Tzv)wwG*H6)RHIJxxBSAnc$34YukwX=MWwb+&MO&{6*3?R8{8xnSKM?Fx^SIqyB zbIrq9*-wfEPB-!(hD)U;417Yhr*_v$3yfCOLjgK9ct=m3wC4po@*K`;f?423NQ%Ha z=HQfTdxjl&#yC@aA?gUOwDc`m_JtKN%GtmX{+jhTzM{j)Zz!HLVWS zT3ud61ZuseM>#VB zB1v^H3>~f3ZuQ1y1W{>t-Z=ZAh`cL8Ph>}_y|h?Wg&}{_PP-`L`oK-Ig}U9hdlkA` zD(w7nYK?aP_vu?cAgjvw$DWY~|Nr`6dn+Ike-c>$`F=-2aTLj*LyZCcadEaCUHG~; z86DPAtoK5nu-&tR!-E*UKmtjQ&F-bed^U;yv{`=a-Q3MyR&EFcei`C7LwUEikDKv_ z{n2hUv{KSVf+2Ghr?p6~s8Uo}UNjM-Va{4f?=S0P)GQHiP&5mMDO6_~Oh#6NWhYTD zHVIY-Br?zR-A}*_d1E(u4)4jZiSX;qv}@p<)$5PHa8uof$- zN#h;PX!Sh`GyKY@#3`XavDTF!tlLp7pOnP|n7ydSTSeRN`9lT0{FsiXdyibTb1c%L zVA^GmC!c-pE7zzK?fNiiRLgGuZTzKsr@X+hJ&sngBnxa3+bfw(?G&G3Q%W|MUt{C{~s zF!W;nx?2MjfY!+%*n5u;$!Pee07wYZ@g^V02=j281Q-OI#l0q(9<@WCr<;o4(a|TM zH_t`S9?g&v-JRw*Z;u>5#?|UTBD=ggqWPrGOk$%Eut6-?OV>%E(R=5l*y|X#64&>rZ z#W3LPCfr7TgzQ0(qgidWUQd+uWMCx7o zEB>|%Jj&TVz$-D|qVAVU4!CF!@J}!yxFe4cX8SF|Y-XBWZzD>se-R!+{t?Wh6=}E7 zVI*Eoa1su_6K2`e8XfsS4OJM|U+&-7VS zIRJ0}JFs%}kcBm|$KkOHXW8Yj-C+KS#mq``V56%9am)P^?MzJPWU+*SyoQeWkRCz< zQ&Lq-Q>VTUJh=@7B#nHSC6HUHAey1!j}y>tP-yPh!o;992`-QHd7AI5t9 zPzm;}i0kMO6~Kl4TT`Y-BTU9Ku;r}*Q1TDl8m%S{+PFzk4&HGip;0#LkTx>X5q%>5 zvea2A%tl(PyC6CoWZ>)xHQQMu6n`UxQHJwS^%+zbld7C*CafaNLfh=(7&7eb)>jvC znLDJo2#ICn^BvWW7|$|a>!k)dOwPL;_Ao<@lzuJMoVs>;vkRhel4yyS2) zNMgz=@z?&pdF|R2kYSCb~_c?Vn#f0va))?V7TyrsA4t^o14=CVLW+YJt zornR!@R}SEh5X@8Mecwsv4(I7&TsC{FBAkUqM~hI4`ElK`EdgmwXTtz>9XPZVjTba zBi?BtsK{w&VnIK?b}XqbS5ujgFthngi(n$Qf0!GV*Ck3#A5=c-XwE4I2shGOBSw|T zij+DsI~26%8A9#jM#!kkG4k(|p=DlNOtp$^w;d!`3Z6v)Np-zYDWC&3J{ zwaUiwtA2L~pTeKQ%+q-puz^>p5WizwIVWT}a7;I6vmOl}V!9x!Q0+N)w0dK<>Zy?Q zIMqMK-zUY;#%$)=v;*}7l%0g)L@qrQ%(KKJ+7(26naCnPXDl!4!)l8vCvdPEi@Jw* z|6Y0vPmvHvkk-$$00p5yRzY+{Zx>_nKI_Xh)l_9kFz3dgjETw(U=}g;=}5EaiyMu4 z_K5!H6(p54QnUJxGgc8!K#+;aOOofhNq5c;z10R2IrtP1H4@T9A)rjBp`BPHrYhlL z+@cieQ3~0svr%Pi6*}fPW-L9x=CjjPl73d0y^9szowR56%tm}k>B)RtEMvOL*=5n6 z-O4NJdBneKC@(Ak6105naj(;SX_5pO7!J@7^!qDe`+jzeJ|J9eMX~dq_a4ty_&9?( zEDkVKBj$N0>Ka>58Y|PQq{Q2j-1e%45yo0bM~*k}vj%t;)h4!(={qG%V1_LSFm}aK zY-tE~MG&?}B;H1))pTEj@~LYqj3<1_=`$4^b24-b8Y}Do-qUr>x|NiG?ruc-9+TCz z;?EP^qy0SZdX`9sh!jt2^KgHyRrl?I`X8rO z8NK~qffuwrcv^i<^-sN;(~rF>En&Wk(?xUpXJ1i$BT!_#xy7-)Kt@ezB>Cmr;5qh^mji@urT}VzT*Om+_r%F`x$OqeakZ|EVfr%`L5IZXlLN1Lx$X$ z+~*?=bbBH!DkWE20Z&N_tCU_B5$>9N<-1b_)B4t9h0o5Fdg(TV#T=ZS;k;e9y5Pt( zcf%BKR`r}pq4b=}Y5!VT0!2?uu5S_u400^GsdDb9m9+E0!adTPK5T5=_*&)oy9xJV zF2%9jIC6B{IhfKk_L`{##PdAGvbj`=i^IWZR_QpWl7Pcg=0JJdXRWYv_wxuM9&rzRW2JGR-w|x_nY#<=SNhGv@xPUGak-)N>My zOneaxybJRv4`{BQkx7I>1a{^b!-nmXAIx>-%-v{b>i|3i&3>}pJSUmS2~`n_z^+yS z5F0W84=jO$-F%Y+=gUmi<5!s6KVLxR@N}V>dBECiGq5qIhN93#0IX18zN$3hPIm?d zV-!XFlLO}a%OLKmW?-;Ek-sboG(;JA1H1~@Hsm`!ZBY~!NrDxAkW>XLMBK-SZsJh| zutEn#h>3_B?HCwPO>9vHDV(GNHjo8$f7;~2gO;L~=q~SL-0fWZ~#j)X&6Bqf(AYY$jk0PJ03wGnXMds4rYbk)o%O?X5s6!3k zfXNPvon#Tm&!fx7m@-U0Xlej*iY)lxbYN7j0b(5#t3F$TR4GoDU7{+BI87QonpRme zOct=Q1)0SHI@Eabh9zRm!uB9RsmW9A4Z;2eABzjLU@_3Yb|{tzO}1YeB?~&EwGSvS z2b9-Gk@s+Bn7q;166{pOsgw*1jwq^ZTtTWtCL1hsmqk9p&jdx)T@RQl&dDjBieNJl zr|tj``9o2y>jP8GF7ag{X4W>)a%KhoKvyva1`M9A)97C%`B`O-U1bAu471WI(n_BRXdc33Qc~vQcM(m z%*7)yFC}Mk;$lTsaNBmW!75Q^;mHs)A-y`Vxw6QmkOqpmsncMpwYY?M85qRpg322J DDw4oP diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index f373f37ad82..b11741a1ada 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=efe9a3d147d948d7528a9887fa35abcf24ca1a43ad06439996490f77569b02d1 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-all.zip +distributionSha256Sum=16f2b95838c1ddcf7242b1c39e7bbbb43c842f1f1a1a0dc4959b6d4d68abcac3 +distributionUrl=https\://services.gradle.org/distributions/gradle-9.2.0-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/gradlew b/gradlew index f5feea6d6b1..23d15a93670 100755 --- a/gradlew +++ b/gradlew @@ -86,8 +86,7 @@ done # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s -' "$PWD" ) || exit +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum @@ -115,7 +114,7 @@ case "$( uname )" in #( NONSTOP* ) nonstop=true ;; esac -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar +CLASSPATH="\\\"\\\"" # Determine the Java command to use to start the JVM. @@ -206,7 +205,7 @@ fi DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' # Collect all arguments for the java command: -# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, # and any embedded shellness will be escaped. # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be # treated as '${Hostname}' itself on the command line. @@ -214,7 +213,7 @@ DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ -classpath "$CLASSPATH" \ - org.gradle.wrapper.GradleWrapperMain \ + -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ "$@" # Stop when "xargs" is not available. diff --git a/gradlew.bat b/gradlew.bat index 9b42019c791..5eed7ee8452 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -70,11 +70,11 @@ goto fail :execute @rem Setup the command line -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar +set CLASSPATH= @rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %* :end @rem End local scope for the variables with windows NT shell diff --git a/integ-test/build.gradle b/integ-test/build.gradle index 6df97e244a8..ea098d51b5a 100644 --- a/integ-test/build.gradle +++ b/integ-test/build.gradle @@ -749,7 +749,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java index 25b1368fcdd..4664491b686 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java @@ -113,24 +113,24 @@ private void createRole(String roleName, String indexPattern) throws IOException String.format( Locale.ROOT, """ - { - "cluster_permissions": [ - "cluster:admin/opensearch/ppl" - ], - "index_permissions": [{ - "index_patterns": [ - "%s" - ], - "allowed_actions": [ - "indices:data/read/search*", - "indices:admin/mappings/get", - "indices:monitor/settings/get", - "indices:data/read/point_in_time/create", - "indices:data/read/point_in_time/delete" - ] - }] - } - """, + { + "cluster_permissions": [ + "cluster:admin/opensearch/ppl" + ], + "index_permissions": [{ + "index_patterns": [ + "%s" + ], + "allowed_actions": [ + "indices:data/read/search*", + "indices:admin/mappings/get", + "indices:monitor/settings/get", + "indices:data/read/point_in_time/create", + "indices:data/read/point_in_time/delete" + ] + }] + } + """, indexPattern)); RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); @@ -151,12 +151,12 @@ private void createUser(String username, String roleName) throws IOException { String.format( Locale.ROOT, """ - { - "password": "%s", - "backend_roles": [], - "attributes": {} - } - """, + { + "password": "%s", + "backend_roles": [], + "attributes": {} + } + """, STRONG_PASSWORD)); RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); @@ -175,12 +175,12 @@ private void createUser(String username, String roleName) throws IOException { String.format( Locale.ROOT, """ - { - "backend_roles": [], - "hosts": [], - "users": ["%s"] - } - """, + { + "backend_roles": [], + "hosts": [], + "users": ["%s"] + } + """, username)); mappingRequest.setOptions(restOptionsBuilder); @@ -270,14 +270,14 @@ private void createRoleWithSpecificPermissions( String.format( Locale.ROOT, """ - { - "cluster_permissions": [%s], - "index_permissions": [{ - "index_patterns": ["%s"], - "allowed_actions": [%s] - }] - } - """, + { + "cluster_permissions": [%s], + "index_permissions": [{ + "index_patterns": ["%s"], + "allowed_actions": [%s] + }] + } + """, clusterPermsJson, indexPattern, indexPermsJson)); diff --git a/legacy/build.gradle b/legacy/build.gradle index 7e78d5e6e13..fd6d7c8f65c 100644 --- a/legacy/build.gradle +++ b/legacy/build.gradle @@ -68,7 +68,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/utils/SQLFunctions.java b/legacy/src/main/java/org/opensearch/sql/legacy/utils/SQLFunctions.java index a6a91995333..7c2e69d7829 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/utils/SQLFunctions.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/utils/SQLFunctions.java @@ -159,8 +159,8 @@ public Tuple function( break; } - // Split is currently not supported since its using .split() in painless which is not - // allow-listed + // Split is currently not supported since its using .split() in painless which is not + // allow-listed case "split": if (paramers.size() == 3) { functionStr = diff --git a/opensearch/build.gradle b/opensearch/build.gradle index 27aa81b0b67..5ca10c7091c 100644 --- a/opensearch/build.gradle +++ b/opensearch/build.gradle @@ -26,7 +26,7 @@ plugins { id 'java-library' id "io.freefair.lombok" id 'jacoco' - id 'info.solidsoft.pitest' version '1.9.0' + id 'info.solidsoft.pitest' version '1.19.0-rc.2' id 'com.diffplug.spotless' } @@ -47,8 +47,8 @@ dependencies { testImplementation('org.junit.jupiter:junit-jupiter-api:5.9.3') testImplementation('org.junit.jupiter:junit-jupiter-params:5.9.3') - testRuntimeOnly('org.junit.jupiter:junit-jupiter-engine:5.9.3') - testRuntimeOnly('org.junit.platform:junit-platform-launcher:1.9.3') + testRuntimeOnly('org.junit.jupiter:junit-jupiter-engine') + testRuntimeOnly('org.junit.platform:junit-platform-launcher') testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: "${hamcrest_version}" testImplementation group: 'org.mockito', name: 'mockito-core', version: "${mockito_version}" @@ -72,13 +72,13 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } pitest { targetClasses = ['org.opensearch.sql.*'] - pitestVersion = '1.9.0' + pitestVersion = '1.19.0-rc.2' threads = 4 outputFormats = ['HTML', 'XML'] timestampedReports = false diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java index 2548c67cffe..837a2a062ef 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java @@ -144,7 +144,7 @@ public static OpenSearchDataType of(MappingType mappingType, Map instances.getOrDefault(mappingType.toString(), new OpenSearchDataType(mappingType)); switch (mappingType) { case Object: - // TODO: use Object type once it has been added + // TODO: use Object type once it has been added case Nested: if (innerMap.isEmpty()) { return res; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index b4bf48bd880..2abfb5a401b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -366,13 +366,15 @@ private static Pair createDistinctAggregation( AggregateBuilderHelper helper) { return switch (aggCall.getAggregation().kind) { - case COUNT -> Pair.of( - helper.build( - !args.isEmpty() ? args.getFirst() : null, - AggregationBuilders.cardinality(aggFieldName)), - new SingleValueParser(aggFieldName)); - default -> throw new AggregateAnalyzer.AggregateAnalyzerException( - String.format("unsupported distinct aggregator %s", aggCall.getAggregation())); + case COUNT -> + Pair.of( + helper.build( + !args.isEmpty() ? args.getFirst() : null, + AggregationBuilders.cardinality(aggFieldName)), + new SingleValueParser(aggFieldName)); + default -> + throw new AggregateAnalyzer.AggregateAnalyzerException( + String.format("unsupported distinct aggregator %s", aggCall.getAggregation())); }; } @@ -383,18 +385,22 @@ private static Pair createRegularAggregation( AggregateBuilderHelper helper) { return switch (aggCall.getAggregation().kind) { - case AVG -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.avg(aggFieldName)), - new SingleValueParser(aggFieldName)); - // 1. Only case SUM, skip SUM0 / COUNT since calling avg() in DSL should be faster. - // 2. To align with databases, SUM0 is not preferred now. - case SUM -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.sum(aggFieldName)), - new SingleValueParser(aggFieldName)); - case COUNT -> Pair.of( - helper.build( - !args.isEmpty() ? args.getFirst() : null, AggregationBuilders.count(aggFieldName)), - new SingleValueParser(aggFieldName)); + case AVG -> + Pair.of( + helper.build(args.getFirst(), AggregationBuilders.avg(aggFieldName)), + new SingleValueParser(aggFieldName)); + // 1. Only case SUM, skip SUM0 / COUNT since calling avg() in DSL should be faster. + // 2. To align with databases, SUM0 is not preferred now. + case SUM -> + Pair.of( + helper.build(args.getFirst(), AggregationBuilders.sum(aggFieldName)), + new SingleValueParser(aggFieldName)); + case COUNT -> + Pair.of( + helper.build( + !args.isEmpty() ? args.getFirst() : null, + AggregationBuilders.count(aggFieldName)), + new SingleValueParser(aggFieldName)); case MIN -> { ExprType fieldType = OpenSearchTypeFactory.convertRelDataTypeToExprType(args.getFirst().getType()); @@ -433,46 +439,54 @@ private static Pair createRegularAggregation( new TopHitsParser(aggFieldName, true)); } } - case VAR_SAMP -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), - new StatsParser(ExtendedStats::getVarianceSampling, aggFieldName)); - case VAR_POP -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), - new StatsParser(ExtendedStats::getVariancePopulation, aggFieldName)); - case STDDEV_SAMP -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), - new StatsParser(ExtendedStats::getStdDeviationSampling, aggFieldName)); - case STDDEV_POP -> Pair.of( - helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), - new StatsParser(ExtendedStats::getStdDeviationPopulation, aggFieldName)); - case ARG_MAX -> Pair.of( - AggregationBuilders.topHits(aggFieldName) - .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) - .size(1) - .from(0) - .sort( - helper.inferNamedField(args.get(1)).getReferenceForTermQuery(), - org.opensearch.search.sort.SortOrder.DESC), - new ArgMaxMinParser(aggFieldName)); - case ARG_MIN -> Pair.of( - AggregationBuilders.topHits(aggFieldName) - .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) - .size(1) - .from(0) - .sort( - helper.inferNamedField(args.get(1)).getReferenceForTermQuery(), - org.opensearch.search.sort.SortOrder.ASC), - new ArgMaxMinParser(aggFieldName)); + case VAR_SAMP -> + Pair.of( + helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), + new StatsParser(ExtendedStats::getVarianceSampling, aggFieldName)); + case VAR_POP -> + Pair.of( + helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), + new StatsParser(ExtendedStats::getVariancePopulation, aggFieldName)); + case STDDEV_SAMP -> + Pair.of( + helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), + new StatsParser(ExtendedStats::getStdDeviationSampling, aggFieldName)); + case STDDEV_POP -> + Pair.of( + helper.build(args.getFirst(), AggregationBuilders.extendedStats(aggFieldName)), + new StatsParser(ExtendedStats::getStdDeviationPopulation, aggFieldName)); + case ARG_MAX -> + Pair.of( + AggregationBuilders.topHits(aggFieldName) + .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) + .size(1) + .from(0) + .sort( + helper.inferNamedField(args.get(1)).getReferenceForTermQuery(), + org.opensearch.search.sort.SortOrder.DESC), + new ArgMaxMinParser(aggFieldName)); + case ARG_MIN -> + Pair.of( + AggregationBuilders.topHits(aggFieldName) + .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) + .size(1) + .from(0) + .sort( + helper.inferNamedField(args.get(1)).getReferenceForTermQuery(), + org.opensearch.search.sort.SortOrder.ASC), + new ArgMaxMinParser(aggFieldName)); case OTHER_FUNCTION -> { BuiltinFunctionName functionName = BuiltinFunctionName.ofAggregation(aggCall.getAggregation().getName()).get(); yield switch (functionName) { - case TAKE -> Pair.of( - AggregationBuilders.topHits(aggFieldName) - .fetchField(helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) - .size(helper.inferValue(args.getLast(), Integer.class)) - .from(0), - new TopHitsParser(aggFieldName)); + case TAKE -> + Pair.of( + AggregationBuilders.topHits(aggFieldName) + .fetchField( + helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) + .size(helper.inferValue(args.getLast(), Integer.class)) + .from(0), + new TopHitsParser(aggFieldName)); case FIRST -> { TopHitsAggregationBuilder firstBuilder = AggregationBuilders.topHits(aggFieldName).size(1).from(0); @@ -505,17 +519,20 @@ yield switch (functionName) { } yield Pair.of(aggBuilder, new SinglePercentileParser(aggFieldName)); } - case DISTINCT_COUNT_APPROX -> Pair.of( - helper.build( - !args.isEmpty() ? args.getFirst() : null, - AggregationBuilders.cardinality(aggFieldName)), - new SingleValueParser(aggFieldName)); - default -> throw new AggregateAnalyzer.AggregateAnalyzerException( - String.format("Unsupported push-down aggregator %s", aggCall.getAggregation())); + case DISTINCT_COUNT_APPROX -> + Pair.of( + helper.build( + !args.isEmpty() ? args.getFirst() : null, + AggregationBuilders.cardinality(aggFieldName)), + new SingleValueParser(aggFieldName)); + default -> + throw new AggregateAnalyzer.AggregateAnalyzerException( + String.format("Unsupported push-down aggregator %s", aggCall.getAggregation())); }; } - default -> throw new AggregateAnalyzer.AggregateAnalyzerException( - String.format("unsupported aggregator %s", aggCall.getAggregation())); + default -> + throw new AggregateAnalyzer.AggregateAnalyzerException( + String.format("unsupported aggregator %s", aggCall.getAggregation())); }; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java index 104ab04e547..5ec21888a89 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java @@ -174,8 +174,9 @@ private void analyzeSimpleComparison(RexCall call, String key) { case LESS_THAN -> { addTo(key, value); } - default -> throw new UnsupportedOperationException( - "ranges must be equivalents of field >= constant or field < constant"); + default -> + throw new UnsupportedOperationException( + "ranges must be equivalents of field >= constant or field < constant"); } ; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 7abb0c1b937..c74356ce977 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -374,7 +374,7 @@ public Expression visitCall(RexCall call) { || MULTI_FIELDS_RELEVANCE_FUNCTION_SET.contains(functionName)) { return visitRelevanceFunc(call); } - // fall through + // fall through default: String message = format(Locale.ROOT, "Unsupported syntax [%s] for call: [%s]", syntax, call); @@ -657,14 +657,16 @@ private QueryExpression binary(RexCall call) { RexUnknownAs nullAs = getNullAsForSearch(call); QueryExpression finalExpression = switch (nullAs) { - // e.g. where isNotNull(a) and (a = 1 or a = 2) - // TODO: For this case, seems return `expression` should be equivalent - case FALSE -> CompoundQueryExpression.and( - false, expression, QueryExpression.create(pair.getKey()).exists()); - // e.g. where isNull(a) or a = 1 or a = 2 - case TRUE -> CompoundQueryExpression.or( - expression, QueryExpression.create(pair.getKey()).notExists()); - // e.g. where a = 1 or a = 2 + // e.g. where isNotNull(a) and (a = 1 or a = 2) + // TODO: For this case, seems return `expression` should be equivalent + case FALSE -> + CompoundQueryExpression.and( + false, expression, QueryExpression.create(pair.getKey()).exists()); + // e.g. where isNull(a) or a = 1 or a = 2 + case TRUE -> + CompoundQueryExpression.or( + expression, QueryExpression.create(pair.getKey()).notExists()); + // e.g. where a = 1 or a = 2 case UNKNOWN -> expression; }; finalExpression.updateAnalyzedNodes(call); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 88f15607a7a..11421fca0a1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -122,13 +122,15 @@ public double estimateRowCount(RelMetadataQuery mq) { switch (operation.type()) { case AGGREGATION -> mq.getRowCount((RelNode) operation.digest()); case PROJECT, SORT, SORT_EXPR -> rowCount; - case SORT_AGG_METRICS -> NumberUtil.min( - rowCount, osIndex.getBucketSize().doubleValue()); - // Refer the org.apache.calcite.rel.metadata.RelMdRowCount + case SORT_AGG_METRICS -> + NumberUtil.min(rowCount, osIndex.getBucketSize().doubleValue()); + // Refer the org.apache.calcite.rel.metadata.RelMdRowCount case COLLAPSE -> rowCount / 10; - case FILTER, SCRIPT -> NumberUtil.multiply( - rowCount, - RelMdUtil.guessSelectivity(((FilterDigest) operation.digest()).condition())); + case FILTER, SCRIPT -> + NumberUtil.multiply( + rowCount, + RelMdUtil.guessSelectivity( + ((FilterDigest) operation.digest()).condition())); case LIMIT -> Math.min(rowCount, ((LimitDigest) operation.digest()).limit()); case RARE_TOP -> { /** similar to {@link Aggregate#estimateRowCount(RelMetadataQuery)} */ @@ -166,7 +168,7 @@ public double estimateRowCount(RelMetadataQuery mq) { dRows = mq.getRowCount((RelNode) operation.digest()); dCpu += dRows * getAggMultiplier(operation); } - // Ignored Project in cost accumulation, but it will affect the external cost + // Ignored Project in cost accumulation, but it will affect the external cost case PROJECT -> {} case SORT -> dCpu += dRows; case SORT_AGG_METRICS -> { @@ -180,15 +182,17 @@ public double estimateRowCount(RelMetadataQuery mq) { sortKeys.stream().filter(digest -> digest.getExpression() != null).count(); dCpu += NumberUtil.multiply(dRows, 1.1 * complexExprCount); } - // Refer the org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(Aggregate rel,...) + // Refer the org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(Aggregate rel,...) case COLLAPSE -> { dRows = dRows / 10; dCpu += dRows; } - // Ignore cost the primitive filter but it will affect the rows count. - case FILTER -> dRows = - NumberUtil.multiply( - dRows, RelMdUtil.guessSelectivity(((FilterDigest) operation.digest()).condition())); + // Ignore cost the primitive filter but it will affect the rows count. + case FILTER -> + dRows = + NumberUtil.multiply( + dRows, + RelMdUtil.guessSelectivity(((FilterDigest) operation.digest()).condition())); case SCRIPT -> { FilterDigest filterDigest = (FilterDigest) operation.digest(); dRows = NumberUtil.multiply(dRows, RelMdUtil.guessSelectivity(filterDigest.condition())); @@ -196,10 +200,10 @@ public double estimateRowCount(RelMetadataQuery mq) { // the factor amplified by script count. dCpu += NumberUtil.multiply(dRows, Math.pow(1.1, filterDigest.scriptCount())); } - // Ignore cost the LIMIT but it will affect the rows count. - // Try to reduce the rows count by 1 to make the cost cheaper slightly than non-push down. - // Because we'd like to push down LIMIT even when the fetch in LIMIT is greater than - // dRows. + // Ignore cost the LIMIT but it will affect the rows count. + // Try to reduce the rows count by 1 to make the cost cheaper slightly than non-push down. + // Because we'd like to push down LIMIT even when the fetch in LIMIT is greater than + // dRows. case LIMIT -> dRows = Math.min(dRows, ((LimitDigest) operation.digest()).limit()) - 1; case RARE_TOP -> { /** similar to {@link Aggregate#computeSelfCost(RelOptPlanner, RelMetadataQuery)} */ diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScript.java index 83ec6093718..10334bc1b26 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/CalciteAggregationScript.java @@ -61,12 +61,11 @@ public Object execute() { // See logic in {@link ExpressionAggregationScript::execute} return switch ((ExprCoreType) exprType) { case TIME -> - // Can't get timestamp from `ExprTimeValue` - MILLIS.between(LocalTime.MIN, ExprValueUtils.fromObjectValue(value, TIME).timeValue()); + // Can't get timestamp from `ExprTimeValue` + MILLIS.between(LocalTime.MIN, ExprValueUtils.fromObjectValue(value, TIME).timeValue()); case DATE -> ExprValueUtils.fromObjectValue(value, DATE).timestampValue().toEpochMilli(); - case TIMESTAMP -> ExprValueUtils.fromObjectValue(value, TIMESTAMP) - .timestampValue() - .toEpochMilli(); + case TIMESTAMP -> + ExprValueUtils.fromObjectValue(value, TIMESTAMP).timestampValue().toEpochMilli(); default -> value; }; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java index 83e8b8b74b1..dab778923b6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/OpenSearchRelOptUtil.java @@ -97,7 +97,7 @@ public static Optional> getOrderEquivalentInputInfo(RexNo return getOrderEquivalentInputInfo(variable) .map(inputInfo -> Pair.of(inputInfo.getLeft(), flipped != inputInfo.getRight())); } - // Ignore DIVIDE operator for now because it has too many precision issues + // Ignore DIVIDE operator for now because it has too many precision issues case CAST, SAFE_CAST: { RexNode child = ((RexCall) expr).getOperands().get(0); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java index 505db011f7b..6205cae42ab 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java @@ -113,7 +113,8 @@ void testAnalyzeSimpleCaseExpression() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -159,7 +160,8 @@ void testAnalyzeLessThanComparison() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -216,7 +218,8 @@ void testAnalyzeWithSearchCondition() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -265,7 +268,8 @@ void testAnalyzeWithNullElse() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -444,7 +448,8 @@ void testAnalyzeWithReversedComparison() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -514,7 +519,8 @@ void testSimpleCaseGeneratesExpectedDSL() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -576,7 +582,8 @@ void testMultipleConditionsGenerateExpectedDSL() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -622,7 +629,8 @@ void testLessThanConditionGeneratesExpectedDSL() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -670,7 +678,8 @@ void testNullElseClauseGeneratesExpectedDSL() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -727,7 +736,8 @@ void testSearchConditionGeneratesExpectedDSL() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } @@ -798,7 +808,8 @@ void testSearchWithDiscontinuousRanges() { "keyed" : true } } - }"""; + }\ + """; assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 9d7a6b93f92..9486f5fe0b8 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -93,14 +93,15 @@ void equals_generatesTermQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(TermQueryBuilder.class, result); assertEquals( """ - { - "term" : { - "a" : { - "value" : 12, - "boost" : 1.0 - } - } - }""", + { + "term" : { + "a" : { + "value" : 12, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -112,30 +113,31 @@ void notEquals_generatesBoolQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(BoolQueryBuilder.class, result); assertEquals( """ - { - "bool" : { - "must" : [ - { - "exists" : { - "field" : "a", - "boost" : 1.0 - } - } - ], - "must_not" : [ - { - "term" : { - "a" : { - "value" : 12, - "boost" : 1.0 - } - } + { + "bool" : { + "must" : [ + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + } + ], + "must_not" : [ + { + "term" : { + "a" : { + "value" : 12, + "boost" : 1.0 } - ], - "adjust_pure_negative" : true, - "boost" : 1.0 + } } - }""", + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -147,17 +149,18 @@ void gt_generatesRangeQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(RangeQueryBuilder.class, result); assertEquals( """ - { - "range" : { - "a" : { - "from" : 12, - "to" : null, - "include_lower" : false, - "include_upper" : true, - "boost" : 1.0 - } - } - }""", + { + "range" : { + "a" : { + "from" : 12, + "to" : null, + "include_lower" : false, + "include_upper" : true, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -170,17 +173,18 @@ void gte_generatesRangeQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(RangeQueryBuilder.class, result); assertEquals( """ - { - "range" : { - "a" : { - "from" : 12, - "to" : null, - "include_lower" : true, - "include_upper" : true, - "boost" : 1.0 - } - } - }""", + { + "range" : { + "a" : { + "from" : 12, + "to" : null, + "include_lower" : true, + "include_upper" : true, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -192,17 +196,18 @@ void lt_generatesRangeQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(RangeQueryBuilder.class, result); assertEquals( """ - { - "range" : { - "a" : { - "from" : null, - "to" : 12, - "include_lower" : true, - "include_upper" : false, - "boost" : 1.0 - } - } - }""", + { + "range" : { + "a" : { + "from" : null, + "to" : 12, + "include_lower" : true, + "include_upper" : false, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -214,17 +219,18 @@ void lte_generatesRangeQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(RangeQueryBuilder.class, result); assertEquals( """ - { - "range" : { - "a" : { - "from" : null, - "to" : 12, - "include_lower" : true, - "include_upper" : true, - "boost" : 1.0 - } - } - }""", + { + "range" : { + "a" : { + "from" : null, + "to" : 12, + "include_lower" : true, + "include_upper" : true, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -236,12 +242,13 @@ void exists_generatesExistsQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(ExistsQueryBuilder.class, result); assertEquals( """ - { - "exists" : { - "field" : "a", - "boost" : 1.0 - } - }""", + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -266,7 +273,8 @@ void notExists_generatesMustNotExistsQuery() throws ExpressionNotAnalyzableExcep "adjust_pure_negative" : true, "boost" : 1.0 } - }""", + }\ + """, result.toString()); } @@ -281,16 +289,17 @@ void search_generatesTermsQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(TermsQueryBuilder.class, result); assertEquals( """ - { - "terms" : { - "a" : [ - 12.0, - 13.0, - 14.0 - ], - "boost" : 1.0 - } - }""", + { + "terms" : { + "a" : [ + 12.0, + 13.0, + 14.0 + ], + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -302,21 +311,22 @@ void contains_generatesMatchQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(MatchQueryBuilder.class, result); assertEquals( """ - { - "match" : { - "b" : { - "query" : "Hi", - "operator" : "OR", - "prefix_length" : 0, - "max_expansions" : 50, - "fuzzy_transpositions" : true, - "lenient" : false, - "zero_terms_query" : "NONE", - "auto_generate_synonyms_phrase_query" : true, - "boost" : 1.0 - } - } - }""", + { + "match" : { + "b" : { + "query" : "Hi", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -329,21 +339,22 @@ void matchRelevanceQueryFunction_generatesMatchQuery() throws ExpressionNotAnaly assertInstanceOf(MatchQueryBuilder.class, result); assertEquals( """ - { - "match" : { - "b" : { - "query" : "Hi", - "operator" : "OR", - "prefix_length" : 0, - "max_expansions" : 50, - "fuzzy_transpositions" : true, - "lenient" : false, - "zero_terms_query" : "NONE", - "auto_generate_synonyms_phrase_query" : true, - "boost" : 1.0 - } - } - }""", + { + "match" : { + "b" : { + "query" : "Hi", + "operator" : "OR", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "lenient" : false, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -365,16 +376,17 @@ void matchPhraseRelevanceQueryFunction_generatesMatchPhraseQuery() assertInstanceOf(MatchPhraseQueryBuilder.class, result); assertEquals( """ - { - "match_phrase" : { - "b" : { - "query" : "Hi", - "slop" : 2, - "zero_terms_query" : "NONE", - "boost" : 1.0 - } - } - }""", + { + "match_phrase" : { + "b" : { + "query" : "Hi", + "slop" : 2, + "zero_terms_query" : "NONE", + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -396,19 +408,20 @@ void matchBoolPrefixRelevanceQueryFunction_generatesMatchBoolPrefixQuery() assertInstanceOf(MatchBoolPrefixQueryBuilder.class, result); assertEquals( """ - { - "match_bool_prefix" : { - "b" : { - "query" : "Hi", - "operator" : "OR", - "minimum_should_match" : "1", - "prefix_length" : 0, - "max_expansions" : 50, - "fuzzy_transpositions" : true, - "boost" : 1.0 - } - } - }""", + { + "match_bool_prefix" : { + "b" : { + "query" : "Hi", + "operator" : "OR", + "minimum_should_match" : "1", + "prefix_length" : 0, + "max_expansions" : 50, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -430,18 +443,19 @@ void matchPhrasePrefixRelevanceQueryFunction_generatesMatchPhrasePrefixQuery() assertInstanceOf(MatchPhrasePrefixQueryBuilder.class, result); assertEquals( """ - { - "match_phrase_prefix" : { - "b" : { - "query" : "Hi", - "analyzer" : "standard", - "slop" : 0, - "max_expansions" : 50, - "zero_terms_query" : "NONE", - "boost" : 1.0 - } - } - }""", + { + "match_phrase_prefix" : { + "b" : { + "query" : "Hi", + "analyzer" : "standard", + "slop" : 0, + "max_expansions" : 50, + "zero_terms_query" : "NONE", + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -473,27 +487,28 @@ void queryStringRelevanceQueryFunction_generatesQueryStringQuery() assertInstanceOf(QueryStringQueryBuilder.class, result); assertEquals( """ - { - "query_string" : { - "query" : "Hi", - "fields" : [ - "b^1.0", - "c^2.5" - ], - "type" : "best_fields", - "default_operator" : "or", - "max_determinized_states" : 10000, - "enable_position_increments" : true, - "fuzziness" : "1", - "fuzzy_prefix_length" : 0, - "fuzzy_max_expansions" : 50, - "phrase_slop" : 0, - "escape" : false, - "auto_generate_synonyms_phrase_query" : true, - "fuzzy_transpositions" : true, - "boost" : 1.0 - } - }""", + { + "query_string" : { + "query" : "Hi", + "fields" : [ + "b^1.0", + "c^2.5" + ], + "type" : "best_fields", + "default_operator" : "or", + "max_determinized_states" : 10000, + "enable_position_increments" : true, + "fuzziness" : "1", + "fuzzy_prefix_length" : 0, + "fuzzy_max_expansions" : 50, + "phrase_slop" : 0, + "escape" : false, + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -518,22 +533,23 @@ void simpleQueryStringRelevanceQueryFunction_generatesSimpleQueryStringQuery() assertInstanceOf(SimpleQueryStringBuilder.class, result); assertEquals( """ - { - "simple_query_string" : { - "query" : "Hi", - "fields" : [ - "b*^1.0" - ], - "flags" : -1, - "default_operator" : "or", - "analyze_wildcard" : false, - "auto_generate_synonyms_phrase_query" : true, - "fuzzy_prefix_length" : 0, - "fuzzy_max_expansions" : 50, - "fuzzy_transpositions" : true, - "boost" : 1.0 - } - }""", + { + "simple_query_string" : { + "query" : "Hi", + "fields" : [ + "b*^1.0" + ], + "flags" : -1, + "default_operator" : "or", + "analyze_wildcard" : false, + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_prefix_length" : 0, + "fuzzy_max_expansions" : 50, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -561,23 +577,24 @@ void multiMatchRelevanceQueryFunction_generatesMultiMatchQuery() assertInstanceOf(MultiMatchQueryBuilder.class, result); assertEquals( """ - { - "multi_match" : { - "query" : "Hi", - "fields" : [ - "b*^1.0" - ], - "type" : "best_fields", - "operator" : "OR", - "slop" : 0, - "prefix_length" : 0, - "max_expansions" : 25, - "zero_terms_query" : "NONE", - "auto_generate_synonyms_phrase_query" : true, - "fuzzy_transpositions" : true, - "boost" : 1.0 - } - }""", + { + "multi_match" : { + "query" : "Hi", + "fields" : [ + "b*^1.0" + ], + "type" : "best_fields", + "operator" : "OR", + "slop" : 0, + "prefix_length" : 0, + "max_expansions" : 25, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -590,15 +607,16 @@ void likeFunction_keywordField_generatesWildcardQuery() throws ExpressionNotAnal assertInstanceOf(WildcardQueryBuilder.class, result); assertEquals( """ - { - "wildcard" : { - "b.keyword" : { - "wildcard" : "*Hi*", - "case_insensitive" : true, - "boost" : 1.0 - } - } - }""", + { + "wildcard" : { + "b.keyword" : { + "wildcard" : "*Hi*", + "case_insensitive" : true, + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -642,54 +660,55 @@ void andOrNot_generatesCompoundQuery() throws ExpressionNotAnalyzableException { assertInstanceOf(BoolQueryBuilder.class, result); assertEquals( """ - { - "bool" : { - "must_not" : [ - { - "bool" : { - "must" : [ - { - "bool" : { - "should" : [ - { - "term" : { - "a" : { - "value" : 12, - "boost" : 1.0 - } - } - }, - { - "term" : { - "a" : { - "value" : 13, - "boost" : 1.0 - } - } + { + "bool" : { + "must_not" : [ + { + "bool" : { + "must" : [ + { + "bool" : { + "should" : [ + { + "term" : { + "a" : { + "value" : 12, + "boost" : 1.0 + } + } + }, + { + "term" : { + "a" : { + "value" : 13, + "boost" : 1.0 } - ], - "adjust_pure_negative" : true, - "boost" : 1.0 - } - }, - { - "term" : { - "b.keyword" : { - "value" : "Hi", - "boost" : 1.0 } } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "term" : { + "b.keyword" : { + "value" : "Hi", + "boost" : 1.0 } - ], - "adjust_pure_negative" : true, - "boost" : 1.0 + } } - } - ], - "adjust_pure_negative" : true, - "boost" : 1.0 + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } } - }""", + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -701,14 +720,15 @@ void equals_generatesTermQuery_TextWithKeyword() throws ExpressionNotAnalyzableE assertInstanceOf(TermQueryBuilder.class, result); assertEquals( """ - { - "term" : { - "b.keyword" : { - "value" : "Hi", - "boost" : 1.0 - } - } - }""", + { + "term" : { + "b.keyword" : { + "value" : "Hi", + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -766,14 +786,15 @@ void isTrue_predicate() throws ExpressionNotAnalyzableException { assertInstanceOf(TermQueryBuilder.class, result); assertEquals( """ - { - "term" : { - "b.keyword" : { - "value" : "Hi", - "boost" : 1.0 - } - } - }""", + { + "term" : { + "b.keyword" : { + "value" : "Hi", + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -828,22 +849,23 @@ void verify_partial_pushdown() throws ExpressionNotAnalyzableException { assertInstanceOf(BoolQueryBuilder.class, resultBuilder); assertEquals( """ + { + "bool" : { + "must" : [ { - "bool" : { - "must" : [ - { - "term" : { - "a" : { - "value" : 12, - "boost" : 1.0 - } - } - } - ], - "adjust_pure_negative" : true, - "boost" : 1.0 + "term" : { + "a" : { + "value" : 12, + "boost" : 1.0 + } } - }""", + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, resultBuilder.toString()); List unAnalyzableNodes = result.getUnAnalyzableNodes(); @@ -869,22 +891,23 @@ void verify_partial_pushdown() throws ExpressionNotAnalyzableException { assertInstanceOf(BoolQueryBuilder.class, resultBuilder); assertEquals( """ - { - "bool" : { - "must" : [ - { - "term" : { - "a" : { - "value" : 12, - "boost" : 1.0 - } - } + { + "bool" : { + "must" : [ + { + "term" : { + "a" : { + "value" : 12, + "boost" : 1.0 } - ], - "adjust_pure_negative" : true, - "boost" : 1.0 + } } - }""", + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, resultBuilder.toString()); } @@ -899,21 +922,22 @@ void multiMatchWithoutFields_generatesMultiMatchQuery() throws ExpressionNotAnal assertInstanceOf(MultiMatchQueryBuilder.class, result); assertEquals( """ - { - "multi_match" : { - "query" : "Hi", - "fields" : [ ], - "type" : "best_fields", - "operator" : "OR", - "slop" : 0, - "prefix_length" : 0, - "max_expansions" : 50, - "zero_terms_query" : "NONE", - "auto_generate_synonyms_phrase_query" : true, - "fuzzy_transpositions" : true, - "boost" : 1.0 - } - }""", + { + "multi_match" : { + "query" : "Hi", + "fields" : [ ], + "type" : "best_fields", + "operator" : "OR", + "slop" : 0, + "prefix_length" : 0, + "max_expansions" : 50, + "zero_terms_query" : "NONE", + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -930,19 +954,20 @@ void simpleQueryStringWithoutFields_generatesSimpleQueryStringQuery() assertInstanceOf(SimpleQueryStringBuilder.class, result); assertEquals( """ - { - "simple_query_string" : { - "query" : "Hi", - "flags" : -1, - "default_operator" : "or", - "analyze_wildcard" : false, - "auto_generate_synonyms_phrase_query" : true, - "fuzzy_prefix_length" : 0, - "fuzzy_max_expansions" : 50, - "fuzzy_transpositions" : true, - "boost" : 1.0 - } - }""", + { + "simple_query_string" : { + "query" : "Hi", + "flags" : -1, + "default_operator" : "or", + "analyze_wildcard" : false, + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_prefix_length" : 0, + "fuzzy_max_expansions" : 50, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -959,24 +984,25 @@ void queryStringWithoutFields_generatesQueryStringQuery() assertInstanceOf(QueryStringQueryBuilder.class, result); assertEquals( """ - { - "query_string" : { - "query" : "Hi", - "fields" : [ ], - "type" : "best_fields", - "default_operator" : "or", - "max_determinized_states" : 10000, - "enable_position_increments" : true, - "fuzziness" : "AUTO", - "fuzzy_prefix_length" : 0, - "fuzzy_max_expansions" : 50, - "phrase_slop" : 0, - "escape" : false, - "auto_generate_synonyms_phrase_query" : true, - "fuzzy_transpositions" : true, - "boost" : 1.0 - } - }""", + { + "query_string" : { + "query" : "Hi", + "fields" : [ ], + "type" : "best_fields", + "default_operator" : "or", + "max_determinized_states" : 10000, + "enable_position_increments" : true, + "fuzziness" : "AUTO", + "fuzzy_prefix_length" : 0, + "fuzzy_max_expansions" : 50, + "phrase_slop" : 0, + "escape" : false, + "auto_generate_synonyms_phrase_query" : true, + "fuzzy_transpositions" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -988,18 +1014,19 @@ void equals_generatesRangeQueryForDateTime() throws ExpressionNotAnalyzableExcep assertInstanceOf(RangeQueryBuilder.class, result); assertEquals( """ - { - "range" : { - "d" : { - "from" : "1987-02-03T04:34:56.000Z", - "to" : "1987-02-03T04:34:56.000Z", - "include_lower" : true, - "include_upper" : true, - "format" : "date_time", - "boost" : 1.0 - } - } - }""", + { + "range" : { + "d" : { + "from" : "1987-02-03T04:34:56.000Z", + "to" : "1987-02-03T04:34:56.000Z", + "include_lower" : true, + "include_upper" : true, + "format" : "date_time", + "boost" : 1.0 + } + } + }\ + """, result.toString()); } @@ -1011,38 +1038,39 @@ void notEquals_generatesBoolQueryForDateTime() throws ExpressionNotAnalyzableExc assertInstanceOf(BoolQueryBuilder.class, result); assertEquals( """ - { - "bool" : { - "should" : [ - { - "range" : { - "d" : { - "from" : "1987-02-03T04:34:56.000Z", - "to" : null, - "include_lower" : false, - "include_upper" : true, - "format" : "date_time", - "boost" : 1.0 - } - } - }, - { - "range" : { - "d" : { - "from" : null, - "to" : "1987-02-03T04:34:56.000Z", - "include_lower" : true, - "include_upper" : false, - "format" : "date_time", - "boost" : 1.0 - } - } + { + "bool" : { + "should" : [ + { + "range" : { + "d" : { + "from" : "1987-02-03T04:34:56.000Z", + "to" : null, + "include_lower" : false, + "include_upper" : true, + "format" : "date_time", + "boost" : 1.0 } - ], - "adjust_pure_negative" : true, - "boost" : 1.0 + } + }, + { + "range" : { + "d" : { + "from" : null, + "to" : "1987-02-03T04:34:56.000Z", + "include_lower" : true, + "include_upper" : false, + "format" : "date_time", + "boost" : 1.0 + } + } } - }""", + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, result.toString()); } @@ -1055,18 +1083,19 @@ void gte_generatesRangeQueryWithFormatForDateTime() throws ExpressionNotAnalyzab assertInstanceOf(RangeQueryBuilder.class, result); assertEquals( """ - { - "range" : { - "d" : { - "from" : "1987-02-03T04:34:56.000Z", - "to" : null, - "include_lower" : true, - "include_upper" : true, - "format" : "date_time", - "boost" : 1.0 - } - } - }""", + { + "range" : { + "d" : { + "from" : "1987-02-03T04:34:56.000Z", + "to" : null, + "include_lower" : true, + "include_upper" : true, + "format" : "date_time", + "boost" : 1.0 + } + } + }\ + """, result.toString()); } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 2fd12db0296..310bb5e73c5 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -1742,7 +1742,8 @@ void cast_to_ip_in_filter(LiteralExpression expr) { "boost" : 1.0 } } - }""", + }\ + """, expr.valueOf().stringValue()); assertJsonEquals(json, buildQuery(DSL.equal(ref("ip_value", IP), DSL.castIp(expr)))); diff --git a/plugin/build.gradle b/plugin/build.gradle index 6d80972edd3..55276b49cc3 100644 --- a/plugin/build.gradle +++ b/plugin/build.gradle @@ -149,7 +149,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } @@ -284,7 +284,7 @@ afterEvaluate { version = "${project.version}" - "-SNAPSHOT" into '/usr/share/opensearch/plugins' - from(zipTree(bundlePlugin.archivePath)) { + from(zipTree(bundlePlugin.archiveFile)) { into opensearchplugin.name } diff --git a/ppl/build.gradle b/ppl/build.gradle index 3e244c6fa01..ef7973b1e37 100644 --- a/ppl/build.gradle +++ b/ppl/build.gradle @@ -81,7 +81,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index 67403741a82..6678bf85d0e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -712,11 +712,11 @@ public void testLineCommentShouldPass() { new PPLSyntaxParser() .parse( """ - // test is a new line comment \ - search source=t a=1 b=2 // test is a line comment at the end of ppl command \ - | fields a,b // this is line comment inner ppl command\ - ////this is a new line comment - """)); + // test is a new line comment \ + search source=t a=1 b=2 // test is a line comment at the end of ppl command \ + | fields a,b // this is line comment inner ppl command\ + ////this is a new line comment + """)); } @Test @@ -727,20 +727,20 @@ public void testBlockCommentShouldPass() { new PPLSyntaxParser() .parse( """ - /* + /* + This is a\ + multiple\ + line\ + block\ + comment */\ + search /* block comment */ source=t /* block comment */ a=1 b=2 + |/* This is a\ multiple\ line\ block\ - comment */\ - search /* block comment */ source=t /* block comment */ a=1 b=2 - |/* - This is a\ - multiple\ - line\ - block\ - comment */ fields a,b /* block comment */ \ - """)); + comment */ fields a,b /* block comment */ \ + """)); } @Test diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLInSubqueryTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLInSubqueryTest.java index 3b4c2b72a27..5c26d70335b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLInSubqueryTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLInSubqueryTest.java @@ -245,16 +245,16 @@ public void testInSubqueryAsJoinFilter() { public void failWhenNumOfColumnsNotMatchOutputOfSubquery() { String less = """ - source=EMP | where (DEPTNO) in [ source=DEPT | fields DEPTNO, DNAME ] - | sort - EMPNO | fields EMPNO, ENAME - """; + source=EMP | where (DEPTNO) in [ source=DEPT | fields DEPTNO, DNAME ] + | sort - EMPNO | fields EMPNO, ENAME + """; assertThrows(SemanticCheckException.class, () -> getRelNode(less)); String more = """ - source=EMP | where (DEPTNO, ENAME) in [ source=DEPT | fields DEPTNO, DNAME, LOC ] - | sort - EMPNO | fields EMPNO, ENAME - """; + source=EMP | where (DEPTNO, ENAME) in [ source=DEPT | fields DEPTNO, DNAME, LOC ] + | sort - EMPNO | fields EMPNO, ENAME + """; assertThrows(SemanticCheckException.class, () -> getRelNode(more)); } diff --git a/prometheus/build.gradle b/prometheus/build.gradle index c35269ab452..c3f746e03d9 100644 --- a/prometheus/build.gradle +++ b/prometheus/build.gradle @@ -30,6 +30,7 @@ dependencies { testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: "${hamcrest_version}" testImplementation group: 'org.mockito', name: 'mockito-core', version: "${mockito_version}" testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: "${mockito_version}" + testRuntimeOnly('org.junit.platform:junit-platform-launcher') } test { diff --git a/protocol/build.gradle b/protocol/build.gradle index fe937648bbe..e6385ab69c2 100644 --- a/protocol/build.gradle +++ b/protocol/build.gradle @@ -42,6 +42,7 @@ dependencies { testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: "${hamcrest_version}" testImplementation group: 'org.mockito', name: 'mockito-core', version: "${mockito_version}" testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: "${mockito_version}" + testRuntimeOnly('org.junit.platform:junit-platform-launcher') } configurations.all { @@ -86,7 +87,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } diff --git a/sql/build.gradle b/sql/build.gradle index 2278496a46d..8391d1538d9 100644 --- a/sql/build.gradle +++ b/sql/build.gradle @@ -57,6 +57,7 @@ dependencies { testImplementation group: 'org.mockito', name: 'mockito-core', version: "${mockito_version}" testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: "${mockito_version}" testImplementation(testFixtures(project(":core"))) + testRuntimeOnly('org.junit.platform:junit-platform-launcher') } spotless { @@ -73,7 +74,7 @@ spotless { removeUnusedImports() trimTrailingWhitespace() endWithNewline() - googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') + googleJavaFormat('1.32.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } From cdb15fa03e19af673c72d6d348801481d69b327f Mon Sep 17 00:00:00 2001 From: Tomoyuki MORITA Date: Thu, 20 Nov 2025 16:46:43 -0800 Subject: [PATCH 71/99] Execute yamlRestTest in integration job (#4838) Signed-off-by: Tomoyuki Morita --- .github/workflows/sql-test-and-build-workflow.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/sql-test-and-build-workflow.yml b/.github/workflows/sql-test-and-build-workflow.yml index 0e7b4c228d2..7e1adf8d474 100644 --- a/.github/workflows/sql-test-and-build-workflow.yml +++ b/.github/workflows/sql-test-and-build-workflow.yml @@ -52,9 +52,9 @@ jobs: run: | chown -R 1000:1000 `pwd` if [ "${{ matrix.test-type }}" = "unit" ]; then - su `id -un 1000` -c "./gradlew --continue build -x integTest -x doctest" + su `id -un 1000` -c "./gradlew --continue build -x integTest -x yamlRestTest -x doctest" elif [ "${{ matrix.test-type }}" = "integration" ]; then - su `id -un 1000` -c "./gradlew --continue integTest" + su `id -un 1000` -c "./gradlew --continue integTest yamlRestTest" else su `id -un 1000` -c "./gradlew --continue doctest" fi @@ -130,9 +130,9 @@ jobs: - name: Build and Test run: | if [ "${{ matrix.test-type }}" = "unit" ]; then - ./gradlew --continue build -x integTest -x doctest ${{ matrix.entry.os_build_args }} + ./gradlew --continue build -x integTest -x yamlRestTest -x doctest ${{ matrix.entry.os_build_args }} elif [ "${{ matrix.test-type }}" = "integration" ]; then - ./gradlew --continue integTest ${{ matrix.entry.os_build_args }} + ./gradlew --continue integTest yamlRestTest ${{ matrix.entry.os_build_args }} else ./gradlew --continue doctest ${{ matrix.entry.os_build_args }} fi From 53d9299fdd95444e62ecc869c7aa0fd3f80606d0 Mon Sep 17 00:00:00 2001 From: Xinyuan Lu Date: Fri, 21 Nov 2025 09:41:59 +0800 Subject: [PATCH 72/99] Fix search anoymizer only (#4783) * fix anoymizer for search command Signed-off-by: xinyual * pushdown match when only one equal in search command Signed-off-by: xinyual * fix regex case Signed-off-by: xinyual * fix UT Signed-off-by: xinyual * fix UT Signed-off-by: xinyual * revert match change Signed-off-by: xinyual * fix UT by ignore the expression Signed-off-by: xinyual * remove useless change and resolve comment Signed-off-by: xinyual * remove useless change and resolve comment Signed-off-by: xinyual * add test cases for metadata and timestamp identifier Signed-off-by: xinyual * change name Signed-off-by: xinyual --------- Signed-off-by: xinyual --- .../org/opensearch/sql/ast/dsl/AstDSL.java | 2 +- .../sql/ast/expression/SearchAnd.java | 5 +++ .../sql/ast/expression/SearchComparison.java | 8 ++++ .../sql/ast/expression/SearchExpression.java | 7 ++++ .../sql/ast/expression/SearchGroup.java | 8 ++++ .../sql/ast/expression/SearchIn.java | 5 +++ .../sql/ast/expression/SearchLiteral.java | 5 +++ .../sql/ast/expression/SearchNot.java | 5 +++ .../sql/ast/expression/SearchOr.java | 5 +++ .../org/opensearch/sql/ast/tree/Search.java | 17 ++++++-- .../sql/utils/QueryStringUtils.java | 20 +++++++++ .../explain_search_with_match_pushdown.json | 6 +++ .../opensearch/sql/ppl/parser/AstBuilder.java | 2 +- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 16 ++++--- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 42 +++++++++++++++++-- 15 files changed, 134 insertions(+), 19 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 93ad06011c0..bf54d2ffd89 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -113,7 +113,7 @@ public UnresolvedPlan describe(String tableName) { } public static UnresolvedPlan search(UnresolvedPlan input, String queryString) { - return new Search(input, queryString); + return new Search(input, queryString, null); } public UnresolvedPlan subqueryAlias(UnresolvedPlan child, String alias) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java index fe7be0db13e..bdfbd9fda39 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchAnd.java @@ -27,6 +27,11 @@ public String toQueryString() { return left.toQueryString() + " AND " + right.toQueryString(); } + @Override + public String toAnonymizedString() { + return left.toAnonymizedString() + " AND " + right.toAnonymizedString(); + } + @Override public List getChild() { return Arrays.asList(left, right); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java index be099059546..c429e6f66cc 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchComparison.java @@ -5,6 +5,8 @@ package org.opensearch.sql.ast.expression; +import static org.opensearch.sql.utils.QueryStringUtils.maskField; + import java.util.Arrays; import java.util.List; import lombok.EqualsAndHashCode; @@ -63,6 +65,12 @@ public String toQueryString() { } } + @Override + public String toAnonymizedString() { + String fieldName = QueryStringUtils.escapeFieldName(field.getField().toString()); + return maskField(fieldName) + " " + operator.symbol + " ***"; + } + @Override public List getChild() { return Arrays.asList(field, value); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java index 23bf806d230..b705909445f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchExpression.java @@ -17,6 +17,13 @@ public abstract class SearchExpression extends UnresolvedExpression { */ public abstract String toQueryString(); + /** + * Convert the search expression to anonymized string + * + * @return the anonymized string + */ + public abstract String toAnonymizedString(); + @Override public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitChildren(this, context); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java index 41b85f408ca..09197202dc0 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchGroup.java @@ -26,6 +26,14 @@ public String toQueryString() { return "(" + expression.toQueryString() + ")"; } + @Override + public String toAnonymizedString() { + if (expression instanceof SearchGroup) { + return expression.toAnonymizedString(); + } + return "(" + expression.toAnonymizedString() + ")"; + } + @Override public List getChild() { return Collections.singletonList(expression); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java index bdbdb712a9c..8291d130dff 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchIn.java @@ -33,6 +33,11 @@ public String toQueryString() { return fieldName + ":( " + valueList + " )"; } + @Override + public String toAnonymizedString() { + return "identifier IN ***"; + } + @Override public List getChild() { List children = new ArrayList<>(); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java index f27dad34bab..460615afa64 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchLiteral.java @@ -55,6 +55,11 @@ public String toQueryString() { return QueryStringUtils.escapeLuceneSpecialCharacters(text); } + @Override + public String toAnonymizedString() { + return "***"; + } + @Override public List getChild() { return Collections.singletonList(literal); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java index 3e5083b7678..b9ea7b416b4 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchNot.java @@ -26,6 +26,11 @@ public String toQueryString() { return "NOT(" + expression.toQueryString() + ")"; } + @Override + public String toAnonymizedString() { + return "NOT(" + expression.toAnonymizedString() + ")"; + } + @Override public List getChild() { return Collections.singletonList(expression); diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java b/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java index b5aa72807bd..1a9e95e89a2 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/SearchOr.java @@ -27,6 +27,11 @@ public String toQueryString() { return left.toQueryString() + " OR " + right.toQueryString(); } + @Override + public String toAnonymizedString() { + return left.toAnonymizedString() + " OR " + right.toAnonymizedString(); + } + @Override public List getChild() { return Arrays.asList(left, right); diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Search.java b/core/src/main/java/org/opensearch/sql/ast/tree/Search.java index ebc74192987..0a705f95c41 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Search.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Search.java @@ -7,11 +7,13 @@ import com.google.common.collect.ImmutableList; import java.util.List; +import javax.annotation.Nullable; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.SearchExpression; /** * Logical plan node for Search operation. Represents search expressions that get converted to @@ -19,12 +21,19 @@ */ @Getter @ToString -@EqualsAndHashCode(callSuper = false) +@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false) @RequiredArgsConstructor public class Search extends UnresolvedPlan { - private final UnresolvedPlan child; - private final String queryString; + @EqualsAndHashCode.Include private final UnresolvedPlan child; + @EqualsAndHashCode.Include private final String queryString; + + // Currently it's only for anonymizer + private final @Nullable SearchExpression originalExpression; + + public Search(UnresolvedPlan child, String queryString) { + this(child, queryString, null); + } @Override public List getChild() { @@ -38,6 +47,6 @@ public T accept(AbstractNodeVisitor nodeVisitor, C context) { @Override public UnresolvedPlan attach(UnresolvedPlan child) { - return new Search(child, queryString); + return new Search(child, queryString, originalExpression); } } diff --git a/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java b/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java index fd9c4b7d2e3..94c1bd9db05 100644 --- a/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/QueryStringUtils.java @@ -8,6 +8,26 @@ /** Utility class for query_string syntax operations. */ public class QueryStringUtils { + private static final String INTERNAL_TIMESTAMP = "@timestamp"; + + public static final String MASK_LITERAL = "***"; + + public static final String MASK_COLUMN = "identifier"; + + public static final String MASK_TIMESTAMP_COLUMN = "time_identifier"; + + public static final String MASK_METADATA_COLUMN = "meta_identifier"; + + public static String maskField(String fieldName) { + if (fieldName.equals(INTERNAL_TIMESTAMP)) { + return MASK_TIMESTAMP_COLUMN; + } + if (fieldName.startsWith("_")) { + return MASK_METADATA_COLUMN; + } + return MASK_COLUMN; + } + // For field names, we typically don't escape dots as they're used for nested fields // But we escape other special characters public static final String LUCENE_SPECIAL_CHARS = "+-&|!(){}[]^\"~:/"; diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json new file mode 100644 index 00000000000..fb6ef8e36ff --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_search_with_match_pushdown.json @@ -0,0 +1,6 @@ +{ + "calcite":{ + "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", + "physical":"CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->match(MAP('field_name', $3), MAP('value', '\"2016\\-12\\-08 00\\:00\\:00.000000000\"':VARCHAR)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"match\":{\"birthdate\":{\"query\":\"\\\"2016\\\\-12\\\\-08 00\\\\:00\\\\:00.000000000\\\"\",\"operator\":\"OR\",\"prefix_length\":0,\"max_expansions\":50,\"fuzzy_transpositions\":true,\"lenient\":false,\"zero_terms_query\":\"NONE\",\"auto_generate_synonyms_phrase_query\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} \ No newline at end of file diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3ffff5f9442..cf674131d92 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -214,7 +214,7 @@ public UnresolvedPlan visitSearchFrom(SearchFromContext ctx) { // Create Search node with relation and query string Relation relation = (Relation) visitFromClause(ctx.fromClause()); - return new Search(relation, queryString); + return new Search(relation, queryString, combined); } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 7e8dc16f4dd..e277dfacbc1 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -7,6 +7,9 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation; import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild; +import static org.opensearch.sql.utils.QueryStringUtils.MASK_COLUMN; +import static org.opensearch.sql.utils.QueryStringUtils.MASK_LITERAL; +import static org.opensearch.sql.utils.QueryStringUtils.maskField; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; @@ -108,11 +111,7 @@ /** Utility class to mask sensitive information in incoming PPL queries. */ public class PPLQueryDataAnonymizer extends AbstractNodeVisitor { - private static final String MASK_LITERAL = "***"; - - private static final String MASK_COLUMN = "identifier"; - - private static final String MASK_TABLE = "table"; + public static final String MASK_TABLE = "table"; private final AnonymizerExpressionAnalyzer expressionAnalyzer; private final Settings settings; @@ -252,9 +251,7 @@ public String visitTableFunction(TableFunction node, String context) { @Override public String visitSearch(Search node, String context) { String source = node.getChild().get(0).accept(this, context); - String queryString = node.getQueryString(); - String anonymized = queryString.replaceAll(":\\S+", ":" + MASK_LITERAL); - return StringUtils.format("%s %s", source, anonymized); + return StringUtils.format("%s %s", source, node.getOriginalExpression().toAnonymizedString()); } @Override @@ -918,7 +915,8 @@ public String visitIn(In node, String context) { @Override public String visitField(Field node, String context) { - return MASK_COLUMN; + String fieldName = node.getField().toString(); + return maskField(fieldName); } @Override diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index f205b9fe0cc..549fca03195 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -33,7 +33,7 @@ public class PPLQueryDataAnonymizerTest { @Test public void testSearchCommand() { - assertEquals("source=table a:***", anonymize("search source=t a=1")); + assertEquals("source=table identifier = ***", anonymize("search source=t a=1")); } @Test @@ -255,8 +255,8 @@ public void testReverseCommand() { @Test public void testTimechartCommand() { assertEquals( - "source=table | timechart limit=*** useother=*** count() by span(identifier, *** m)" - + " identifier", + "source=table | timechart limit=*** useother=*** count() by span(time_identifier, ***" + + " m) identifier", anonymize("source=t | timechart count() by host")); } @@ -388,6 +388,13 @@ public void testAndExpression() { anonymize("source=t | where a=1 and b=2")); } + @Test + public void testAndExpressionWithMetaData() { + assertEquals( + "source=table | where meta_identifier = *** and identifier = ***", + anonymize("source=t | where _id=1 and b=2")); + } + @Test public void testOrExpression() { assertEquals( @@ -879,10 +886,37 @@ private String anonymizeStatement(String query, boolean isExplain) { @Test public void testSearchWithAbsoluteTimeRange() { assertEquals( - "source=table (@timestamp:*** AND (@timestamp:***", + "source=table (time_identifier >= ***) AND (time_identifier <= ***)", anonymize("search source=t earliest='2012-12-10 15:00:00' latest=now")); } + @Test + public void testSearchWithIn() { + assertEquals("source=table identifier IN ***", anonymize("search source=t balance in (2000)")); + } + + @Test + public void testSearchWithNot() { + assertEquals( + "source=table NOT(identifier = ***)", anonymize("search NOT balance=2000 source=t")); + } + + @Test + public void testSearchWithGroup() { + assertEquals( + "source=table ((identifier = *** OR identifier = ***) AND identifier > ***)", + anonymize( + "search (severityText=\"ERROR\" OR severityText=\"WARN\") AND severityNumber>10" + + " source=t")); + } + + @Test + public void testSearchWithOr() { + assertEquals( + "source=table (time_identifier >= *** OR time_identifier <= ***)", + anonymize("search source=t earliest='2012-12-10 15:00:00' or latest=now")); + } + @Test public void testSpath() { assertEquals( From dc98a909a4ce72febea001eaedc71fef0a5ce4dd Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 24 Nov 2025 12:57:17 +0800 Subject: [PATCH 73/99] Fix the flaky CalcitePPLTcphIT (#4846) Signed-off-by: Lantao Jin --- .../sql/calcite/tpch/CalcitePPLTpchIT.java | 13 +++++-------- .../opensearch/sql/legacy/SQLIntegTestCase.java | 13 +++++++++++++ .../java/org/opensearch/sql/legacy/TestUtils.java | 15 +++++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java index 18e7246cc63..d83140e0dd5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchIT.java @@ -17,7 +17,6 @@ import java.util.Locale; import org.json.JSONObject; import org.junit.Assume; -import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.ppl.PPLIntegTestCase; import org.opensearch.sql.util.Retry; @@ -143,9 +142,7 @@ public void testQ3() throws IOException { rows(4423, 3055.9365, "1995-02-17 00:00:00", 0)); } - // TODO: Aggregation push down has a hard-coded limit of 1000 buckets for output, so this query - // will not return the correct results with aggregation push down and it's unstable - @Ignore + @Test public void testQ4() throws IOException { String ppl = sanitize(loadFromFile("tpch/queries/q4.ppl")); JSONObject actual = executeQuery(ppl); @@ -153,11 +150,11 @@ public void testQ4() throws IOException { actual, schema("o_orderpriority", "string"), schema("order_count", "bigint")); verifyDataRows( actual, - rows("1-URGENT", 7), + rows("1-URGENT", 9), rows("2-HIGH", 7), - rows("3-MEDIUM", 4), - rows("4-NOT SPECIFIED", 7), - rows("5-LOW", 10)); + rows("3-MEDIUM", 9), + rows("4-NOT SPECIFIED", 8), + rows("5-LOW", 12)); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 47632dbc942..50ee11b765a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -210,6 +210,19 @@ protected synchronized void loadIndex(Index index, RestClient client) throws IOE createIndexByRestClient(client, indexName, mapping); loadDataByRestClient(client, indexName, dataSet); } + // loadIndex() could directly return when isIndexExist()=true, + // e.g. the index is created in the cluster but data hasn't been flushed. + // We block loadIndex() until data loaded to resolve + // https://github.com/opensearch-project/sql/issues/4261 + int countDown = 3; // 1500ms timeout + while (countDown != 0 && getDocCount(client, indexName) == 0) { + try { + Thread.sleep(500); + countDown--; + } catch (InterruptedException e) { + throw new IOException(e); + } + } } protected synchronized void loadIndex(Index index) throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index a94e89ec0e6..2ac1763836e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -104,6 +104,21 @@ public static void loadDataByRestClient( performRequest(client, request); } + /** + * Return how many docs in the index + * + * @param client client connection + * @param indexName index name + * @return doc count of the index + * @throws IOException + */ + public static int getDocCount(RestClient client, String indexName) throws IOException { + Request request = new Request("GET", "/" + indexName + "/_count"); + Response response = performRequest(client, request); + JSONObject jsonObject = new JSONObject(getResponseBody(response)); + return jsonObject.getInt("count"); + } + /** * Perform a request by REST client. * From dee22bb24e88f81bd660e2b03458d0bb05253d63 Mon Sep 17 00:00:00 2001 From: qianheng Date: Mon, 24 Nov 2025 15:07:41 +0800 Subject: [PATCH 74/99] Remove redundant push-down-filters derived for bucket-non-null agg (#4843) * Remove redundant push-down-filters derived for bucket-non-null aggregation Signed-off-by: Heng Qian * Address comment Signed-off-by: Heng Qian * Fix name typo Signed-off-by: Heng Qian --------- Signed-off-by: Heng Qian --- ..._composite_autodate_range_metric_push.yaml | 2 +- ...ange_metric_sort_agg_measure_not_push.yaml | 2 +- ...te_autodate_sort_agg_measure_not_push.yaml | 2 +- ...osite_range_sort_agg_measure_not_push.yaml | 2 +- .../calcite/chart_with_timestamp_span.yaml | 2 +- .../calcite/clickbench/q10.yaml | 2 +- .../calcite/clickbench/q17.yaml | 2 +- .../calcite/clickbench/q18.yaml | 2 +- .../calcite/clickbench/q19.yaml | 2 +- .../calcite/clickbench/q29.yaml | 2 +- .../calcite/clickbench/q31.yaml | 2 +- .../calcite/clickbench/q32.yaml | 2 +- .../calcite/clickbench/q33.yaml | 2 +- .../calcite/clickbench/q40.yaml | 4 +- .../calcite/clickbench/q41.yaml | 2 +- .../calcite/clickbench/q42.yaml | 2 +- .../calcite/date_histogram_hourly_agg.yaml | 2 +- .../explain_agg_script_udt_arg_push.yaml | 2 +- .../calcite/explain_agg_sort_on_measure2.yaml | 2 +- .../calcite/explain_agg_sort_on_measure3.yaml | 2 +- .../calcite/explain_agg_sort_on_measure4.yaml | 2 +- .../explain_agg_sort_on_measure_complex1.yaml | 2 +- .../explain_agg_sort_on_measure_complex2.yaml | 2 +- ...t_on_measure_multi_buckets_not_pushed.yaml | 2 +- ...gg_with_sort_on_one_measure_not_push1.yaml | 2 +- ...gg_with_sort_on_one_measure_not_push2.yaml | 2 +- .../explain_stats_bins_on_time_and_term2.yaml | 2 +- ...ain_stats_by_span_non_bucket_nullable.json | 2 +- .../calcite/explain_stats_by_timespan.json | 2 +- .../calcite/explain_stats_by_timespan2.json | 2 +- .../planner/rules/AggregateIndexScanRule.java | 155 +++++++++++++++--- .../planner/rules/OpenSearchIndexRules.java | 3 + 32 files changed, 164 insertions(+), 56 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml index dccce23e18b..14ee0aff97b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(@timestamp=[$0], category=[$1], value=[$2], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], timestamp=[WIDTH_BUCKET($3, 3, -(MAX($3) OVER (), MIN($3) OVER ()), MAX($3) OVER ())], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'great':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2, 3},avg(value)=AVG($1),count()=COUNT()), PROJECT->[avg(value), count(), timestamp, value_range, category], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg(value)=AVG($3),count()=COUNT()), PROJECT->[avg(value), count(), timestamp, value_range, category], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_sort_agg_measure_not_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_sort_agg_measure_not_push.yaml index 90e83946c38..ade5ec642b6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_sort_agg_measure_not_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_sort_agg_measure_not_push.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2, 3},avg(value)=AVG($1),cnt=COUNT()), PROJECT->[avg(value), cnt, timestamp, value_range, category]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg(value)=AVG($3),cnt=COUNT()), PROJECT->[avg(value), cnt, timestamp, value_range, category]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_sort_agg_measure_not_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_sort_agg_measure_not_push.yaml index e3d4d9fba4d..48689ff1503 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_sort_agg_measure_not_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_sort_agg_measure_not_push.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1),cnt=COUNT()), PROJECT->[avg(value), cnt, timestamp, category]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(value)=AVG($2),cnt=COUNT()), PROJECT->[avg(value), cnt, timestamp, category]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_sort_agg_measure_not_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_sort_agg_measure_not_push.yaml index 19846e9910b..10e839ce22c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_sort_agg_measure_not_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_sort_agg_measure_not_push.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$1], dir0=[ASC-nulls-first]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1),cnt=COUNT()), PROJECT->[avg(value), cnt, value_range, category]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(value)=AVG($2),cnt=COUNT()), PROJECT->[avg(value), cnt, value_range, category]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml index cdce175e83a..a07c92033d0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[PROJECT->[@timestamp], FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml index 4e6c0e1f59f..7fc8c3d5a7f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q10.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($68)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum(AdvEngineID)=SUM($1),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml index 6b3ab9ed1fe..2f2b2f0a897 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($84), IS NOT NULL($63))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, UserID], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"UserID","boost":1.0}},{"exists":{"field":"SearchPhrase","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","UserID"],"excludes":[]},"aggregations":{"UserID|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml index c940061f690..59742c27ae9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($84), IS NOT NULL($63))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"SearchPhrase":{"terms":{"field":"SearchPhrase","missing_bucket":false,"order":"asc"}}},{"UserID":{"terms":{"field":"UserID","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"UserID":{"terms":{"field":"UserID","missing_bucket":false,"order":"asc"}}},{"SearchPhrase":{"terms":{"field":"SearchPhrase","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml index e0c99960471..54d8b79cbc3 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], m=[EXTRACT('minute':VARCHAR, $17)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[UserID, m, SearchPhrase, count()], SORT_AGG_METRICS->[3 DESC LAST], PROJECT->[count(), UserID, m, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"SearchPhrase|UserID|m":{"multi_terms":{"terms":[{"field":"SearchPhrase"},{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), UserID, m, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml index 4ec2ad5e6c8..97fdce0a1c0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml @@ -15,4 +15,4 @@ calcite: EnumerableLimit(fetch=[25]) EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[100000], expr#5=[>($t1, $t4)], proj#0..3=[{exprs}], $condition=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[Referer], SCRIPT->AND(<>($0, ''), IS NOT NULL(REGEXP_REPLACE($0, '^https?://(?:www\.)?([^/]+)/.*$':VARCHAR, '$1'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($2),c=COUNT(),min(Referer)=MIN($1)), PROJECT->[l, c, min(Referer), k]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"Referer","boost":1.0}}],"must_not":[{"term":{"Referer":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDEnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJUyBOT1QgTlVMTCIsCiAgICAia2luZCI6ICJJU19OT1RfTlVMTCIsCiAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMgogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["Referer","^https?://(?:www\\.)?([^/]+)/.*$","$1"]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["Referer"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"k":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDIKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["Referer","^https?://(?:www\\.)?([^/]+)/.*$","$1"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["Referer"]}}}},"min(Referer)":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"Referer"}],"sort":[{"Referer":{"order":"asc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[Referer], FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($2),c=COUNT(),min(Referer)=MIN($1)), PROJECT->[l, c, min(Referer), k]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"Referer","boost":1.0}}],"must_not":[{"term":{"Referer":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["Referer"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"k":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDIKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["Referer","^https?://(?:www\\.)?([^/]+)/.*$","$1"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["Referer"]}}}},"min(Referer)":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"Referer"}],"sort":[{"Referer":{"order":"asc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml index 16e58d05b48..0f9c18436cd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml index 2c78447a13d..6016bd287c6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER-><>($1, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml index 964dcece0f6..c143a17262a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q33.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($41), IS NOT NULL($76))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),sum(IsRefresh)=SUM($1),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml index 00d724db076..b2ae45002ed 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml @@ -10,7 +10,7 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - EnumerableCalc(expr#0..5=[{inputs}], PageViews=[$t5], TraficSourceID=[$t0], SearchEngineID=[$t2], AdvEngineID=[$t1], Src=[$t3], Dst=[$t4]) + EnumerableCalc(expr#0..5=[{inputs}], PageViews=[$t5], TraficSourceID=[$t0], SearchEngineID=[$t1], AdvEngineID=[$t2], Src=[$t3], Dst=[$t4]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, TraficSourceID, AdvEngineID, URL, SearchEngineID, IsRefresh, Referer, CounterID], FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","TraficSourceID","AdvEngineID","URL","SearchEngineID","IsRefresh","Referer","CounterID"],"excludes":[]},"aggregations":{"TraficSourceID|AdvEngineID|SearchEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"AdvEngineID"},{"field":"SearchEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGCnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAib3AiOiB7CiAgICAgICAgICAgICJuYW1lIjogIj0iLAogICAgICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgICAgIH0sCiAgICAgICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlNNQUxMSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiA0LAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, TraficSourceID, AdvEngineID, URL, SearchEngineID, IsRefresh, Referer, CounterID], FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","TraficSourceID","AdvEngineID","URL","SearchEngineID","IsRefresh","Referer","CounterID"],"excludes":[]},"aggregations":{"TraficSourceID|SearchEngineID|AdvEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"SearchEngineID"},{"field":"AdvEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGCnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAib3AiOiB7CiAgICAgICAgICAgICJuYW1lIjogIj0iLAogICAgICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgICAgIH0sCiAgICAgICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlNNQUxMSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiA0LAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index 2670e3a77f4..9bb4ac10e86 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]; NULL AS FALSE]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"bool":{"must":[{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"exists":{"field":"EventDate","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index a4078d5f8c7..9f785bf7cc9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/date_histogram_hourly_agg.yaml index 06361ea27e8..a97ca073a21 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/date_histogram_hourly_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/date_histogram_hourly_agg.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($17)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml index ae68535cb0b..483b5d2dfa0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], t=[DATE_ADD($3, 1:INTERVAL DAY)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[SCRIPT->IS NOT NULL(DATE_ADD($3, 1:INTERVAL DAY)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(t,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDs3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJJUyBOT1QgTlVMTCIsCiAgICAia2luZCI6ICJJU19OT1RfTlVMTCIsCiAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJEQVRFX0FERCIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["birthdate"]}},"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(t,1d)":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFmXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTUEFOIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJEQVRFX0FERCIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAxCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAibnVsbGFibGUiOiB0cnVlLAogICAgInByZWNpc2lvbiI6IC0xCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["birthdate",1,"d"]}},"missing_bucket":false,"value_type":"long","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(t,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(t,1d)":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFmXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTUEFOIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJEQVRFX0FERCIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAxCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAibnVsbGFibGUiOiB0cnVlLAogICAgInByZWNpc2lvbiI6IC0xCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["birthdate",1,"d"]}},"missing_bucket":false,"value_type":"long","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml index 1808eba1f08..734e6ea1571 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure2.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum=SUM($1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"sum":"desc"},{"_key":"asc"}]},"aggregations":{"sum":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml index a40c5cec466..44393bfce83 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[cnt, span(birthdate,1d)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"birthdate","boost":1.0}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"cnt":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"cnt":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[cnt, span(birthdate,1d)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"cnt":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"cnt":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml index 74ff751bcef..57132615c41 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure4.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance, age], FILTER->IS NOT NULL($1), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum(balance), span(age,5)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"age","boost":1.0}},"_source":{"includes":["balance","age"],"excludes":[]},"aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(balance)=SUM($0)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[sum(balance), span(age,5)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(age,5)":{"histogram":{"field":"age","interval":5.0,"offset":0.0,"order":[{"sum(balance)":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml index e75e44a129d..6a1e114acc5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={2},sum(balance)=SUM($0),c=COUNT(),dc(employer)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(balance), c, dc(employer), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum(balance)=SUM($1),c=COUNT(),dc(employer)=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(balance), c, dc(employer), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"dc(employer)":{"cardinality":{"field":"employer.keyword"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml index 0bc8e26bdd2..211aa979ce0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_complex2.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 3},sum(balance)=SUM($0),count()=COUNT(),d=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[4 DESC LAST], PROJECT->[sum(balance), count(), d, gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},sum(balance)=SUM($2),count()=COUNT(),d=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[4 DESC LAST], PROJECT->[sum(balance), count(), d, gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|new_state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"d":"desc"},{"_key":"asc"}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"d":{"cardinality":{"field":"employer.keyword"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml index 4ecd0e026bb..7532d318bb2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml @@ -10,4 +10,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], dir0=[ASC-nulls-first]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, state, age], FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),s=SUM($1)), PROJECT->[c, s, span(age,5), state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"age","boost":1.0}},{"exists":{"field":"state","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","state","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}},{"span(age,5)":{"histogram":{"field":"age","missing_bucket":false,"order":"asc","interval":5.0}}}]},"aggregations":{"s":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},c=COUNT(),s=SUM($1)), PROJECT->[c, s, span(age,5), state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}},{"span(age,5)":{"histogram":{"field":"age","missing_bucket":false,"order":"asc","interval":5.0}}}]},"aggregations":{"s":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml index 8ac888eab4a..2f63567329f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml @@ -10,4 +10,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$2], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),s=SUM($0)), PROJECT->[c, s, state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"s":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),s=SUM($1)), PROJECT->[c, s, state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"s":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push2.yaml index d1651f464a6..4177d6616be 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_multiple_agg_with_sort_on_one_measure_not_push2.yaml @@ -10,4 +10,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),s=SUM($0)), PROJECT->[c, s, state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"s":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),s=SUM($1)), PROJECT->[c, s, state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"s":{"sum":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml index 1dc48f5a550..09e016d8e9e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(environment=[$0], status_code=[$2], service=[$3], host=[$4], memory_usage=[$5], response_time=[$6], cpu_usage=[$7], region=[$8], bytes_sent=[$9], _id=[$10], _index=[$11], _score=[$12], _maxscore=[$13], _sort=[$14], _routing=[$15], @timestamp=[WIDTH_BUCKET($1, 3, -(MAX($1) OVER (), MIN($1) OVER ()), MAX($1) OVER ())]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 2},avg(cpu_usage)=AVG($0)), PROJECT->[avg(cpu_usage), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(cpu_usage)=AVG($2)), PROJECT->[avg(cpu_usage), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json index 5a8d7cb07c4..977fe96090c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(age,10)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(age,10)=[SPAN($10, 10, null:NULL)])\n LogicalFilter(condition=[IS NOT NULL($10)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age], FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(age,10)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"age\",\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(age,10)\":{\"histogram\":{\"field\":\"age\",\"missing_bucket\":false,\"order\":\"asc\",\"interval\":10.0}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(age,10)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(age,10)\":{\"histogram\":{\"field\":\"age\",\"missing_bucket\":false,\"order\":\"asc\",\"interval\":10.0}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json index dcd2cd5e0b7..f84aa0cb018 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1m)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"birthdate\",\"boost\":1.0}},\"_source\":{\"includes\":[\"birthdate\"],\"excludes\":[]},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json index a310e0b460e..036547978b1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1M)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"birthdate\",\"boost\":1.0}},\"_source\":{\"includes\":[\"birthdate\"],\"excludes\":[]},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"calendar_interval\":\"1M\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"calendar_interval\":\"1M\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java index 77107b0c8e0..d87611f213b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java @@ -5,26 +5,35 @@ package org.opensearch.sql.opensearch.planner.rules; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.isTimeBasedType; import static org.opensearch.sql.expression.function.PPLBuiltinOperators.WIDTH_BUCKET; import java.util.List; import java.util.function.Function; import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexSlot; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.tools.RelBuilder; import org.immutables.value.Value; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; +import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.udf.binning.WidthBucketFunction; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @@ -40,23 +49,46 @@ protected AggregateIndexScanRule(Config config) { @Override public void onMatch(RelOptRuleCall call) { - if (call.rels.length == 4) { + if (call.rels.length == 5) { + final LogicalAggregate aggregate = call.rel(0); + final LogicalProject topProject = call.rel(1); + final LogicalFilter filter = call.rel(2); + final LogicalProject bottomProject = call.rel(3); + final CalciteLogicalIndexScan scan = call.rel(4); + boolean ignoreNullBucket = Config.aggIgnoreNullBucket.test(aggregate); + List groupRefList = + aggregate.getGroupSet().asList().stream() + .map(topProject.getProjects()::get) + .filter(rex -> ignoreNullBucket || isTimeSpan(rex)) + .flatMap(expr -> PlanUtils.getInputRefs(expr).stream()) + .map(RexSlot::getIndex) + .toList(); + if (isNotNullDerivedFromAgg(filter, groupRefList)) { + final List newProjects = + RelOptUtil.pushPastProjectUnlessBloat( + topProject.getProjects(), bottomProject, RelOptUtil.DEFAULT_BLOAT); + if (newProjects != null) { + // replace the two projects with a combined projection + RelBuilder relBuilder = call.builder(); + relBuilder.push(scan); + relBuilder.project(newProjects, topProject.getRowType().getFieldNames()); + RelNode node = relBuilder.build(); + if (node instanceof LogicalProject newProject) { + apply(call, aggregate, newProject, scan); + } else if (node.equals(scan)) { + // It means no project is needed + apply(call, aggregate, null, scan); + } + // Do nothing, no any transform + } + } + } else if (call.rels.length == 4) { final LogicalAggregate aggregate = call.rel(0); final LogicalFilter filter = call.rel(1); final LogicalProject project = call.rel(2); final CalciteLogicalIndexScan scan = call.rel(3); - List groupSet = aggregate.getGroupSet().asList(); - RexNode condition = filter.getCondition(); - Function isNotNullFromAgg = - rex -> - rex instanceof RexCall rexCall - && rexCall.getOperator() == SqlStdOperatorTable.IS_NOT_NULL - && rexCall.getOperands().get(0) instanceof RexInputRef ref - && groupSet.contains(ref.getIndex()); - if (isNotNullFromAgg.apply(condition) - || (condition instanceof RexCall rexCall - && rexCall.getOperator() == SqlStdOperatorTable.AND - && rexCall.getOperands().stream().allMatch(isNotNullFromAgg::apply))) { + List groupList = aggregate.getGroupSet().asList(); + if (isNotNullDerivedFromAgg(filter, groupList)) { // Try to do the aggregate push down and ignore the filter if the filter sources from the // aggregate's hint. See{@link CalciteRelNodeVisitor::visitAggregation} apply(call, aggregate, project, scan); @@ -79,6 +111,30 @@ public void onMatch(RelOptRuleCall call) { } } + private boolean isTimeSpan(RexNode rex) { + return rex instanceof RexCall rexCall + && rexCall.getKind() == SqlKind.OTHER_FUNCTION + && rexCall.getOperator().getName().equalsIgnoreCase(BuiltinFunctionName.SPAN.name()) + && rexCall.getOperands().size() == 3 + && rexCall.getOperands().get(2) instanceof RexLiteral unitLiteral + && unitLiteral.getTypeName() != SqlTypeName.NULL; + } + + private boolean isNotNullDerivedFromAgg(LogicalFilter filter, List groupRefList) { + RexNode condition = filter.getCondition(); + Function isNotNullFromAgg = + rex -> + rex instanceof RexCall rexCall + && rexCall.isA(SqlKind.IS_NOT_NULL) + && rexCall.getOperands().get(0) instanceof RexInputRef ref + && groupRefList.contains(ref.getIndex()); + + return isNotNullFromAgg.apply(condition) + || (condition instanceof RexCall rexCall + && rexCall.getOperator() == SqlStdOperatorTable.AND + && rexCall.getOperands().stream().allMatch(isNotNullFromAgg::apply)); + } + protected void apply( RelOptRuleCall call, LogicalAggregate aggregate, @@ -143,8 +199,21 @@ public interface Config extends OpenSearchRuleConfig { Predicate.not(AbstractCalciteIndexScan::isLimitPushed) .and(AbstractCalciteIndexScan::noAggregatePushed)) .noInputs())); - // TODO: No need this rule once https://github.com/opensearch-project/sql/issues/4403 is - // addressed + Predicate aggIgnoreNullBucket = + agg -> + agg.getHints().stream() + .anyMatch( + hint -> + hint.hintName.equals("stats_args") + && hint.kvOptions.get(Argument.BUCKET_NULLABLE).equals("false")); + Predicate maybeTimeSpanAgg = + agg -> + agg.getGroupSet().stream() + .allMatch( + group -> + isTimeBasedType( + agg.getInput().getRowType().getFieldList().get(group).getType())); + Config BUCKET_NON_NULL_AGG = ImmutableAggregateIndexScanRule.Config.builder() .build() @@ -152,15 +221,7 @@ public interface Config extends OpenSearchRuleConfig { .withOperandSupplier( b0 -> b0.operand(LogicalAggregate.class) - .predicate( - agg -> - agg.getHints().stream() - .anyMatch( - hint -> - hint.hintName.equals("stats_args") - && hint.kvOptions - .get(Argument.BUCKET_NULLABLE) - .equals("false"))) + .predicate(aggIgnoreNullBucket) .oneInput( b1 -> b1.operand(LogicalFilter.class) @@ -190,6 +251,50 @@ public interface Config extends OpenSearchRuleConfig { ::noAggregatePushed)) .noInputs())))); + Config BUCKET_NON_NULL_AGG_WITH_UDF = + ImmutableAggregateIndexScanRule.Config.builder() + .build() + .withDescription("Agg-Project-Filter-Project-TableScan") + .withOperandSupplier( + b0 -> + b0.operand(LogicalAggregate.class) + .predicate(aggIgnoreNullBucket.or(maybeTimeSpanAgg)) + .oneInput( + b1 -> + b1.operand(LogicalProject.class) + .predicate( + Predicate.not(PlanUtils::containsRexOver) + .and(PlanUtils::distinctProjectList)) + .oneInput( + b2 -> + b2.operand(LogicalFilter.class) + .predicate(Config::mayBeFilterFromBucketNonNull) + .oneInput( + b3 -> + b3.operand(LogicalProject.class) + .predicate( + Predicate.not( + PlanUtils::containsRexOver) + .and( + PlanUtils + ::distinctProjectList) + .or( + Config + ::containsWidthBucketFuncOnDate)) + .oneInput( + b4 -> + b4.operand( + CalciteLogicalIndexScan + .class) + .predicate( + Predicate.not( + AbstractCalciteIndexScan + ::isLimitPushed) + .and( + AbstractCalciteIndexScan + ::noAggregatePushed)) + .noInputs()))))); + @Override default AggregateIndexScanRule toRule() { return new AggregateIndexScanRule(this); @@ -206,7 +311,7 @@ static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) { private static boolean isNotNullOnRef(RexNode rex) { return rex instanceof RexCall rexCall - && rexCall.getOperator().equals(SqlStdOperatorTable.IS_NOT_NULL) + && rexCall.isA(SqlKind.IS_NOT_NULL) && rexCall.getOperands().get(0) instanceof RexInputRef; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index 42262097333..c991b1964e9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -21,6 +21,8 @@ public class OpenSearchIndexRules { // TODO: No need this rule once https://github.com/opensearch-project/sql/issues/4403 is addressed private static final AggregateIndexScanRule BUCKET_NON_NULL_AGG_INDEX_SCAN = AggregateIndexScanRule.Config.BUCKET_NON_NULL_AGG.toRule(); + private static final AggregateIndexScanRule BUCKET_NON_NULL_AGG_WITH_UDF_INDEX_SCAN = + AggregateIndexScanRule.Config.BUCKET_NON_NULL_AGG_WITH_UDF.toRule(); private static final LimitIndexScanRule LIMIT_INDEX_SCAN = LimitIndexScanRule.Config.DEFAULT.toRule(); private static final SortIndexScanRule SORT_INDEX_SCAN = @@ -49,6 +51,7 @@ public class OpenSearchIndexRules { AGGREGATE_INDEX_SCAN, COUNT_STAR_INDEX_SCAN, BUCKET_NON_NULL_AGG_INDEX_SCAN, + BUCKET_NON_NULL_AGG_WITH_UDF_INDEX_SCAN, LIMIT_INDEX_SCAN, SORT_INDEX_SCAN, // TODO enable if https://github.com/opensearch-project/OpenSearch/issues/3725 resolved From 18d506af963405f9c5d1a30369bbcc057701af31 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 24 Nov 2025 17:02:21 +0800 Subject: [PATCH 75/99] Grouping key field type can only be overwritten when the ExprCoreType are different (#4850) Signed-off-by: Lantao Jin --- .../rest-api-spec/test/issues/4845.yml | 49 +++++++++++++++++++ .../value/OpenSearchExprValueFactory.java | 13 +++-- 2 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4845.yml diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4845.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4845.yml new file mode 100644 index 00000000000..ba1ccd1256b --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4845.yml @@ -0,0 +1,49 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: test + body: + mappings: + properties: + "EventDate": + type: date + format: yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"EventDate": "2013-07-15 10:47:34"}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"handle custom format field with pushdown": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test | stats count() by EventDate + + - match: { total: 1 } + - match: {"schema": [{"name": "count()", "type": "bigint"},{"name": "EventDate", "type": "timestamp"}]} + - match: {"datarows": [[1, "2013-07-15 10:47:34"]]} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 85093ba39ab..2aca2ad334e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -79,13 +79,20 @@ public class OpenSearchExprValueFactory { private final boolean fieldTypeTolerance; /** - * Extend existing mapping by new data without overwrite. Called from aggregation only {@see - * AggregationQueryBuilder#buildTypeMapping}. + * Extend existing mapping by new data. Overwrite only when the ExprCoreType of them are + * different. Called from aggregation only {@see AggregationQueryBuilder#buildTypeMapping}. * * @param typeMapping A data type mapping produced by aggregation. */ public void extendTypeMapping(Map typeMapping) { - this.typeMapping.putAll(typeMapping); + typeMapping.forEach( + (groupKey, extendedType) -> { + OpenSearchDataType existedType = this.typeMapping.get(groupKey); + if (existedType == null + || !existedType.getExprCoreType().equals(extendedType.getExprCoreType())) { + this.typeMapping.put(groupKey, extendedType); + } + }); } @Getter @Setter private OpenSearchAggregationResponseParser parser; From a85c3433bb6b7cdebad8d2a5a5470ed0c433a679 Mon Sep 17 00:00:00 2001 From: Kai Huang <105710027+ahkcs@users.noreply.github.com> Date: Mon, 24 Nov 2025 10:47:51 -0800 Subject: [PATCH 76/99] Support `mvdedup` eval function (#4828) * Support eval function Signed-off-by: Kai Huang * Updates Signed-off-by: Kai Huang * update javadoc Signed-off-by: Kai Huang * Update to use ARRAY_DISTINCT Signed-off-by: Kai Huang --------- Signed-off-by: Kai Huang --- .../function/BuiltinFunctionName.java | 1 + .../expression/function/PPLFuncImpTable.java | 2 + docs/user/ppl/functions/collection.rst | 38 +++++++++ .../remote/CalciteArrayFunctionIT.java | 78 +++++++++++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../calcite/CalcitePPLArrayFunctionTest.java | 76 ++++++++++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 9 +++ 8 files changed, 206 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 656bc018e50..87b0d9e69a7 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -75,6 +75,7 @@ public enum BuiltinFunctionName { MVAPPEND(FunctionName.of("mvappend")), MVJOIN(FunctionName.of("mvjoin")), MVINDEX(FunctionName.of("mvindex")), + MVDEDUP(FunctionName.of("mvdedup")), FORALL(FunctionName.of("forall")), EXISTS(FunctionName.of("exists")), FILTER(FunctionName.of("filter")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 06162a86813..116f250d379 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -150,6 +150,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVAPPEND; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVDEDUP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVINDEX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVJOIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT; @@ -991,6 +992,7 @@ void populate() { registerOperator(ARRAY, PPLBuiltinOperators.ARRAY); registerOperator(MVAPPEND, PPLBuiltinOperators.MVAPPEND); + registerOperator(MVDEDUP, SqlLibraryOperators.ARRAY_DISTINCT); registerOperator(MAP_APPEND, PPLBuiltinOperators.MAP_APPEND); registerOperator(MAP_CONCAT, SqlLibraryOperators.MAP_CONCAT); registerOperator(MAP_REMOVE, PPLBuiltinOperators.MAP_REMOVE); diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst index 5c2b7c30f74..34c02074641 100644 --- a/docs/user/ppl/functions/collection.rst +++ b/docs/user/ppl/functions/collection.rst @@ -302,6 +302,44 @@ Example:: | [1,text,2.5] | +--------------+ +MVDEDUP +------- + +Description +>>>>>>>>>>> + +Usage: mvdedup(array) removes duplicate values from a multivalue array while preserving the order of first occurrence. NULL elements are filtered out. Returns an array with duplicates removed, or null if the input is null. + +Argument type: array: ARRAY + +Return type: ARRAY + +Example:: + + os> source=people | eval array = array(1, 2, 2, 3, 1, 4), result = mvdedup(array) | fields result | head 1 + fetched rows / total rows = 1/1 + +-----------+ + | result | + |-----------| + | [1,2,3,4] | + +-----------+ + + os> source=people | eval array = array('z', 'a', 'z', 'b', 'a', 'c'), result = mvdedup(array) | fields result | head 1 + fetched rows / total rows = 1/1 + +-----------+ + | result | + |-----------| + | [z,a,b,c] | + +-----------+ + + os> source=people | eval array = array(), result = mvdedup(array) | fields result | head 1 + fetched rows / total rows = 1/1 + +--------+ + | result | + |--------| + | [] | + +--------+ + MVINDEX ------- diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java index c829565768f..52a6e181e20 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java @@ -489,4 +489,82 @@ public void testMvindexRangeSingleElement() throws IOException { verifySchema(actual, schema("result", "array")); verifyDataRows(actual, rows(List.of(3))); } + + @Test + public void testMvdedupWithDuplicates() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(1, 2, 2, 3, 1, 4), result = mvdedup(arr) | head 1 |" + + " fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of(1, 2, 3, 4))); + } + + @Test + public void testMvdedupWithNoDuplicates() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(1, 2, 3, 4), result = mvdedup(arr) | head 1 |" + + " fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of(1, 2, 3, 4))); + } + + @Test + public void testMvdedupWithAllDuplicates() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(5, 5, 5, 5), result = mvdedup(arr) | head 1 |" + + " fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of(5))); + } + + @Test + public void testMvdedupWithEmptyArray() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array(), result = mvdedup(arr) | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of())); + } + + @Test + public void testMvdedupWithStrings() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('apple', 'banana', 'apple', 'cherry', 'banana')," + + " result = mvdedup(arr) | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of("apple", "banana", "cherry"))); + } + + @Test + public void testMvdedupPreservesOrder() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval arr = array('z', 'a', 'z', 'b', 'a', 'c'), result =" + + " mvdedup(arr) | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + // Should preserve first occurrence order: z, a, b, c + verifyDataRows(actual, rows(List.of("z", "a", "b", "c"))); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 370aaad56c0..86982893d0c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -443,6 +443,7 @@ ARRAY_LENGTH: 'ARRAY_LENGTH'; MVAPPEND: 'MVAPPEND'; MVJOIN: 'MVJOIN'; MVINDEX: 'MVINDEX'; +MVDEDUP: 'MVDEDUP'; FORALL: 'FORALL'; FILTER: 'FILTER'; TRANSFORM: 'TRANSFORM'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index caf636942fd..12a02f897d8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1096,6 +1096,7 @@ collectionFunctionName | MVAPPEND | MVJOIN | MVINDEX + | MVDEDUP | FORALL | EXISTS | FILTER diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java index bffa20175d5..176fb534f37 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java @@ -214,4 +214,80 @@ public void testMvindexRangeNegative() { + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testMvdedupWithDuplicates() { + String ppl = + "source=EMP | eval arr = array(1, 2, 2, 3, 1, 4), result = mvdedup(arr) | head 1 |" + + " fields result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array(1, 2, 2, 3, 1, 4)]," + + " result=[ARRAY_DISTINCT(array(1, 2, 2, 3, 1, 4))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "result=[1, 2, 3, 4]\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT ARRAY_DISTINCT(ARRAY(1, 2, 2, 3, 1, 4)) `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvdedupWithNoDuplicates() { + String ppl = + "source=EMP | eval arr = array(1, 2, 3, 4), result = mvdedup(arr) | head 1 | fields" + + " result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array(1, 2, 3, 4)]," + + " result=[ARRAY_DISTINCT(array(1, 2, 3, 4))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "result=[1, 2, 3, 4]\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT ARRAY_DISTINCT(ARRAY(1, 2, 3, 4)) `result`\n" + "FROM `scott`.`EMP`\n" + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvdedupPreservesOrder() { + String ppl = + "source=EMP | eval arr = array('z', 'a', 'z', 'b', 'a', 'c'), result = mvdedup(arr) |" + + " head 1 | fields result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], arr=[array('z', 'a', 'z', 'b', 'a', 'c')]," + + " result=[ARRAY_DISTINCT(array('z', 'a', 'z', 'b', 'a', 'c'))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedResult = "result=[z, a, b, c]\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT ARRAY_DISTINCT(ARRAY('z', 'a', 'z', 'b', 'a', 'c')) `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 549fca03195..0f59e98e74b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -829,6 +829,15 @@ public void testMvindex() { anonymize("source=t | eval result=mvindex(array(1, 2, 3, 4, 5), 1, 3) | fields result")); } + @Test + public void testMvdedup() { + // Test mvdedup with array containing duplicates + assertEquals( + "source=table | eval identifier=mvdedup(array(***,***,***,***,***,***)) | fields +" + + " identifier", + anonymize("source=t | eval result=mvdedup(array(1, 2, 2, 3, 1, 4)) | fields result")); + } + @Test public void testRexWithOffsetField() { when(settings.getSettingValue(Key.PPL_REX_MAX_MATCH_LIMIT)).thenReturn(10); From 2b8992a63b11074b242250f281734b1f32dd8e9d Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 25 Nov 2025 13:46:34 +0800 Subject: [PATCH 77/99] Update syntax: like(string, PATTERN[, case_sensitive]) (#4837) * Change like function to case-sensitive and add ilike for case-insensitive Signed-off-by: Lantao Jin * change plan in clickbench Signed-off-by: Lantao Jin * fix ut Signed-off-by: Lantao Jin * fix UT Signed-off-by: Lantao Jin * Update syntax: like(string, PATTERN[, case_sensitive]) Signed-off-by: Lantao Jin * fix IT Signed-off-by: Lantao Jin * Fix IT Signed-off-by: Lantao Jin * revert import merging Signed-off-by: Lantao Jin * support like(string, PATTERN, bool) in v2 Signed-off-by: Lantao Jin * dedup ut Signed-off-by: Lantao Jin --------- Signed-off-by: Lantao Jin --- .../sql/calcite/CalciteRexNodeVisitor.java | 9 +++ .../function/BuiltinFunctionName.java | 1 + .../expression/function/PPLFuncImpTable.java | 32 ++++++++-- .../predicate/BinaryPredicateOperators.java | 14 +++- .../opensearch/sql/utils/OperatorUtils.java | 20 +++++- .../BinaryPredicateOperatorTest.java | 4 +- docs/user/ppl/functions/string.rst | 52 ++++++++++++--- .../sql/calcite/remote/CalciteExplainIT.java | 20 ++++++ .../calcite/remote/CalciteLikeQueryIT.java | 64 +++++++++++++++++++ .../org/opensearch/sql/ppl/ExplainIT.java | 54 +++++++++++++++- .../org/opensearch/sql/ppl/LikeQueryIT.java | 25 ++++++++ .../test/resources/clickbench/queries/q21.ppl | 2 +- .../test/resources/clickbench/queries/q22.ppl | 2 +- .../test/resources/clickbench/queries/q23.ppl | 2 +- .../test/resources/clickbench/queries/q24.ppl | 2 +- .../calcite/clickbench/q21.yaml | 4 +- .../calcite/clickbench/q22.yaml | 4 +- .../calcite/clickbench/q23.yaml | 4 +- .../calcite/clickbench/q24.yaml | 6 +- .../explain_keyword_ilike_function.yaml | 8 +++ .../explain_keyword_like_function.yaml | 4 +- ...eyword_like_function_case_insensitive.yaml | 8 +++ .../calcite/explain_text_ilike_function.yaml | 8 +++ .../calcite/explain_text_like_function.yaml | 4 +- ...n_text_like_function_case_insensitive.yaml | 8 +++ .../explain_keyword_ilike_function.yaml | 10 +++ .../explain_keyword_like_function.yaml | 4 +- ...eyword_like_function_case_insensitive.yaml | 10 +++ .../explain_text_ilike_function.yaml | 10 +++ .../explain_text_like_function.yaml | 4 +- ...n_text_like_function_case_insensitive.yaml | 10 +++ .../ppl/explain_keyword_like_function.yaml | 9 ++- ...eyword_like_function_case_insensitive.yaml | 17 +++++ .../ppl/explain_text_like_function.yaml | 2 +- ...n_text_like_function_case_insensitive.yaml | 17 +++++ .../src/test/resources/tpch/queries/q13.ppl | 2 +- .../src/test/resources/tpch/queries/q14.ppl | 2 +- .../src/test/resources/tpch/queries/q16.ppl | 4 +- .../src/test/resources/tpch/queries/q2.ppl | 2 +- .../src/test/resources/tpch/queries/q20.ppl | 2 +- .../src/test/resources/tpch/queries/q9.ppl | 2 +- .../opensearch/request/PredicateAnalyzer.java | 10 +-- .../script/filter/lucene/LikeQuery.java | 20 ++++-- .../script/filter/lucene/LuceneQuery.java | 10 +++ .../request/PredicateAnalyzerTest.java | 27 +++++++- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 + .../sql/ppl/parser/AstExpressionBuilder.java | 12 +++- .../sql/ppl/utils/ArgumentFactory.java | 14 ++-- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 13 +--- .../sql/ppl/utils/UnresolvedPlanHelper.java | 13 ++++ .../calcite/CalcitePPLStringFunctionTest.java | 23 ++++++- .../ppl/parser/AstExpressionBuilderTest.java | 30 +++++++-- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 13 ++++ 54 files changed, 561 insertions(+), 95 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_ilike_function.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function_case_insensitive.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_text_ilike_function.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function_case_insensitive.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_ilike_function.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function_case_insensitive.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_ilike_function.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function_case_insensitive.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function_case_insensitive.yaml diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index ef6def9d4dd..ca0798c1e75 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -418,6 +418,15 @@ public RexNode visitFunction(Function node, CalcitePlanContext context) { } } + if ("LIKE".equalsIgnoreCase(node.getFuncName()) && arguments.size() == 2) { + RexNode defaultCaseSensitive = + CalcitePlanContext.isLegacyPreferred() + ? context.rexBuilder.makeLiteral(false) + : context.rexBuilder.makeLiteral(true); + arguments = new ArrayList<>(arguments); + arguments.add(defaultCaseSensitive); + } + RexNode resolvedNode = PPLFuncImpTable.INSTANCE.resolve( context.rexBuilder, node.getFuncName(), arguments.toArray(new RexNode[0])); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 87b0d9e69a7..21b938532ba 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -194,6 +194,7 @@ public enum BuiltinFunctionName { GREATER(FunctionName.of(">")), GTE(FunctionName.of(">=")), LIKE(FunctionName.of("like")), + ILIKE(FunctionName.of("ilike")), NOT_LIKE(FunctionName.of("not like")), /** Aggregation Function. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 116f250d379..aa15c3228f1 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -79,6 +79,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.HOUR_OF_DAY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IF; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IFNULL; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ILIKE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_GROK; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_ITEM; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PARSE; @@ -328,6 +329,18 @@ default RexNode resolve(RexBuilder builder, RexNode... args) { } } + public interface FunctionImp3 extends FunctionImp { + RexNode resolve(RexBuilder builder, RexNode arg1, RexNode arg2, RexNode arg3); + + @Override + default RexNode resolve(RexBuilder builder, RexNode... args) { + if (args.length != 3) { + throw new IllegalArgumentException("This function requires exactly 3 arguments"); + } + return resolve(builder, args[0], args[1], args[2]); + } + } + /** The singleton instance. */ public static final PPLFuncImpTable INSTANCE; @@ -1217,17 +1230,22 @@ void populate() { arg))), PPLTypeChecker.family(SqlTypeFamily.ANY)); register( - LIKE, + ILIKE, (FunctionImp2) (builder, arg1, arg2) -> builder.makeCall( - SqlLibraryOperators.ILIKE, - arg1, - arg2, - // TODO: Figure out escaping solution. '\\' is used for JSON input but is not - // necessary for SQL function input - builder.makeLiteral("\\")), + SqlLibraryOperators.ILIKE, arg1, arg2, builder.makeLiteral("\\")), PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING)); + register( + LIKE, + (FunctionImp3) + (builder, arg1, arg2, arg3) -> + ((RexLiteral) arg3).getValueAs(Boolean.class) + ? builder.makeCall( + SqlStdOperatorTable.LIKE, arg1, arg2, builder.makeLiteral("\\")) + : builder.makeCall( + SqlLibraryOperators.ILIKE, arg1, arg2, builder.makeLiteral("\\")), + PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.BOOLEAN)); } } diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java index 6adc4fb2a36..3543fc22a1c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java @@ -387,7 +387,8 @@ private static DefaultFunctionResolver gte() { private static DefaultFunctionResolver like() { return define( BuiltinFunctionName.LIKE.getName(), - impl(nullMissingHandling(OperatorUtils::matches), BOOLEAN, STRING, STRING)); + impl(nullMissingHandling(OperatorUtils::matches2), BOOLEAN, STRING, STRING), + impl(nullMissingHandling(OperatorUtils::matches3), BOOLEAN, STRING, STRING, BOOLEAN)); } private static DefaultFunctionResolver regexp() { @@ -401,10 +402,17 @@ private static DefaultFunctionResolver notLike() { BuiltinFunctionName.NOT_LIKE.getName(), impl( nullMissingHandling( - (v1, v2) -> UnaryPredicateOperators.not(OperatorUtils.matches(v1, v2))), + (v1, v2) -> UnaryPredicateOperators.not(OperatorUtils.matches2(v1, v2))), BOOLEAN, STRING, - STRING)); + STRING), + impl( + nullMissingHandling( + (v1, v2, v3) -> UnaryPredicateOperators.not(OperatorUtils.matches3(v1, v2, v3))), + BOOLEAN, + STRING, + STRING, + BOOLEAN)); } private static ExprValue lookupTableFunction( diff --git a/core/src/main/java/org/opensearch/sql/utils/OperatorUtils.java b/core/src/main/java/org/opensearch/sql/utils/OperatorUtils.java index d9ae0b42580..2dd3c76ae9e 100644 --- a/core/src/main/java/org/opensearch/sql/utils/OperatorUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/OperatorUtils.java @@ -21,13 +21,31 @@ public class OperatorUtils { * @param pattern string pattern to match. * @return if text matches pattern returns true; else return false. */ - public static ExprBooleanValue matches(ExprValue text, ExprValue pattern) { + public static ExprBooleanValue matches2(ExprValue text, ExprValue pattern) { return ExprBooleanValue.of( Pattern.compile(patternToRegex(pattern.stringValue()), Pattern.CASE_INSENSITIVE) .matcher(text.stringValue()) .matches()); } + /** + * Wildcard pattern matcher util.
    + * Percent (%) character for wildcard,
    + * Underscore (_) character for a single character match. + * + * @param pattern string pattern to match. + * @param caseSensitive indicate the case sensitivity of the pattern. + * @return if text matches pattern returns true; else return false. + */ + public static ExprBooleanValue matches3( + ExprValue text, ExprValue pattern, ExprValue caseSensitive) { + Pattern p = + caseSensitive.booleanValue() + ? Pattern.compile(patternToRegex(pattern.stringValue())) + : Pattern.compile(patternToRegex(pattern.stringValue()), Pattern.CASE_INSENSITIVE); + return ExprBooleanValue.of(p.matcher(text.stringValue()).matches()); + } + /** * Checks if text matches regular expression pattern. * diff --git a/core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java b/core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java index 3fc7f737f83..5ac9a114f0e 100644 --- a/core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java @@ -22,7 +22,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.utils.ComparisonUtil.compare; -import static org.opensearch.sql.utils.OperatorUtils.matches; +import static org.opensearch.sql.utils.OperatorUtils.matches2; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -554,7 +554,7 @@ public void test_gte(ExprValue v1, ExprValue v2) { public void test_like(ExprValue v1, ExprValue v2) { FunctionExpression like = DSL.like(DSL.literal(v1), DSL.literal(v2)); assertEquals(BOOLEAN, like.type()); - assertEquals(matches(v1, v2), like.valueOf(valueEnv())); + assertEquals(matches2(v1, v2), like.valueOf(valueEnv())); assertEquals(String.format("like(%s, %s)", v1.toString(), v2.toString()), like.toString()); } diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index fac59d27eca..3e94d220094 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -80,33 +80,69 @@ Example:: | 10 | +----------------------+ - LIKE ---- Description >>>>>>>>>>> -Usage: like(string, PATTERN) return true if the string match the PATTERN, PATTERN is case insensitive. +Usage: like(string, PATTERN[, case_sensitive]) return true if the string match the PATTERN. ``case_sensitive`` is optional. When set to ``true``, PATTERN is **case-sensitive**. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. + + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``case_sensitive`` defaults to ``false`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``case_sensitive`` defaults to ``true`` There are two wildcards often used in conjunction with the LIKE operator: * ``%`` - The percent sign represents zero, one, or multiple characters * ``_`` - The underscore represents a single character +Argument type: STRING, STRING [, BOOLEAN] + +Return type: INTEGER + Example:: - os> source=people | eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ELLO%') | fields `LIKE('hello world', '_ello%')` + os> source=people | eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ello%'), `LIKE('hello world', '_ELLo%', true)` = LIKE('hello world', '_ELLo%', true), `LIKE('hello world', '_ELLo%', false)` = LIKE('hello world', '_ELLo%', false) | fields `LIKE('hello world', '_ello%')`, `LIKE('hello world', '_ELLo%', true)`, `LIKE('hello world', '_ELLo%', false)` fetched rows / total rows = 1/1 - +-------------------------------+ - | LIKE('hello world', '_ello%') | - |-------------------------------| - | True | - +-------------------------------+ + +-------------------------------+-------------------------------------+--------------------------------------+ + | LIKE('hello world', '_ello%') | LIKE('hello world', '_ELLo%', true) | LIKE('hello world', '_ELLo%', false) | + |-------------------------------+-------------------------------------+--------------------------------------| + | True | False | True | + +-------------------------------+-------------------------------------+--------------------------------------+ Limitation: The pushdown of the LIKE function to a DSL wildcard query is supported only for keyword fields. +ILIKE +---- + +Description +>>>>>>>>>>> + +Usage: ilike(string, PATTERN) return true if the string match the PATTERN, PATTERN is **case-insensitive**. + +There are two wildcards often used in conjunction with the ILIKE operator: + +* ``%`` - The percent sign represents zero, one, or multiple characters +* ``_`` - The underscore represents a single character + +Argument type: STRING, STRING + +Return type: INTEGER + +Example:: + + os> source=people | eval `ILIKE('hello world', '_ELLo%')` = ILIKE('hello world', '_ELLo%') | fields `ILIKE('hello world', '_ELLo%')` + fetched rows / total rows = 1/1 + +--------------------------------+ + | ILIKE('hello world', '_ELLo%') | + |--------------------------------| + | True | + +--------------------------------+ + + +Limitation: The pushdown of the ILIKE function to a DSL wildcard query is supported only for keyword fields. + LOCATE ------- diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index c2dce34fc38..06993dab289 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -667,6 +667,26 @@ public void testStreamstatsResetExplain() throws IOException { assertYamlEqualsIgnoreId(expected, result); } + @Test + public void testKeywordILikeFunctionExplain() throws IOException { + // ilike is only supported in v3 + String expected = loadExpectedPlan("explain_keyword_ilike_function.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | where ilike(firstname, '%mbe%')")); + } + + @Test + public void testTextILikeFunctionExplain() throws IOException { + // ilike is only supported in v3 + String expected = loadExpectedPlan("explain_text_ilike_function.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | where ilike(address, '%Holmes%')")); + } + // Only for Calcite, as v2 gets unstable serialized string for function @Test public void testExplainOnAggregationWithSumEnhancement() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java index 3a3b1bb0648..4debe504dad 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteLikeQueryIT.java @@ -5,8 +5,15 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; + import java.io.IOException; +import org.json.JSONObject; import org.junit.Test; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.ppl.LikeQueryIT; public class CalciteLikeQueryIT extends LikeQueryIT { @@ -22,4 +29,61 @@ public void test_convert_field_text_to_keyword() throws IOException { enabledOnlyWhenPushdownIsEnabled(); super.test_convert_field_text_to_keyword(); } + + @Test + public void test_ilike_is_case_insensitive() throws IOException { + String query = + "source=" + + TEST_INDEX_WILDCARD + + " | WHERE ILike(KeywordBody, 'test Wildcard%') | fields KeywordBody"; + JSONObject result = executeQuery(query); + verifyDataRows( + result, + rows("test wildcard"), + rows("test wildcard in the end of the text%"), + rows("test wildcard in % the middle of the text"), + rows("test wildcard %% beside each other"), + rows("test wildcard in the end of the text_"), + rows("test wildcard in _ the middle of the text"), + rows("test wildcard __ beside each other")); + } + + @Test + public void test_the_default_3rd_option() throws IOException { + // only work in v3 + String query = + "source=" + + TEST_INDEX_WILDCARD + + " | WHERE Like(KeywordBody, 'test Wildcard%') | fields KeywordBody"; + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "true", + () -> { + try { + JSONObject result = executeQuery(query); + verifyDataRows( + result, + rows("test wildcard"), + rows("test wildcard in the end of the text%"), + rows("test wildcard in % the middle of the text"), + rows("test wildcard %% beside each other"), + rows("test wildcard in the end of the text_"), + rows("test wildcard in _ the middle of the text"), + rows("test wildcard __ beside each other")); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + try { + JSONObject result = executeQuery(query); + verifyNumOfRows(result, 0); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 2319cf703a4..9c34586e06d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -20,6 +20,7 @@ import org.junit.Ignore; import org.junit.jupiter.api.Test; import org.opensearch.client.ResponseException; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.TestUtils; public class ExplainIT extends PPLIntegTestCase { @@ -551,7 +552,31 @@ public void testKeywordLikeFunctionExplain() throws IOException { assertYamlEqualsIgnoreId( expected, explainQueryYaml( - "source=opensearch-sql_test_index_account | where like(firstname, '%mbe%')")); + "source=opensearch-sql_test_index_account | where like(firstname, '%mbe%', true)")); + if (isCalciteEnabled()) { + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + try { + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | where like(firstname, '%mbe%')")); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + } + + @Test + public void testKeywordLikeFunctionCaseInsensitiveExplain() throws IOException { + String expected = loadExpectedPlan("explain_keyword_like_function_case_insensitive.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | where like(firstname, '%mbe%', false)")); } @Test @@ -560,7 +585,32 @@ public void testTextLikeFunctionExplain() throws IOException { assertYamlEqualsIgnoreId( expected, explainQueryYaml( - "source=opensearch-sql_test_index_account | where like(address, '%Holmes%')")); + "source=opensearch-sql_test_index_account | where like(address, '%Holmes%', true)")); + if (isCalciteEnabled()) { + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + try { + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | where like(address," + + " '%Holmes%')")); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + } + + @Test + public void testTextLikeFunctionCaseInsensitiveExplain() throws IOException { + String expected = loadExpectedPlan("explain_text_like_function_case_insensitive.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | where like(address, '%Holmes%', false)")); } @Ignore("The serialized string is unstable because of function properties") diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java index 31ea92d54e4..bc98c312fb6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/LikeQueryIT.java @@ -8,6 +8,7 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; import java.io.IOException; import org.json.JSONObject; @@ -104,4 +105,28 @@ public void test_convert_field_text_to_keyword() throws IOException { String result = explainQueryToString(query); assertTrue(result.contains("TextKeywordBody.keyword")); } + + @Test + public void test_like_with_case_sensitive() throws IOException { + String query = + "source=" + + TEST_INDEX_WILDCARD + + " | WHERE Like(KeywordBody, 'test Wildcard%', false) | fields KeywordBody"; + JSONObject result = executeQuery(query); + verifyDataRows( + result, + rows("test wildcard"), + rows("test wildcard in the end of the text%"), + rows("test wildcard in % the middle of the text"), + rows("test wildcard %% beside each other"), + rows("test wildcard in the end of the text_"), + rows("test wildcard in _ the middle of the text"), + rows("test wildcard __ beside each other")); + query = + "source=" + + TEST_INDEX_WILDCARD + + " | WHERE Like(KeywordBody, 'test Wildcard%', true) | fields KeywordBody"; + result = executeQuery(query); + verifyNumOfRows(result, 0); + } } diff --git a/integ-test/src/test/resources/clickbench/queries/q21.ppl b/integ-test/src/test/resources/clickbench/queries/q21.ppl index 657b0e1fc30..16524f91aec 100644 --- a/integ-test/src/test/resources/clickbench/queries/q21.ppl +++ b/integ-test/src/test/resources/clickbench/queries/q21.ppl @@ -2,5 +2,5 @@ SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; */ source=hits -| where like(URL, '%google%') +| where like(URL, '%google%', true) | stats count() \ No newline at end of file diff --git a/integ-test/src/test/resources/clickbench/queries/q22.ppl b/integ-test/src/test/resources/clickbench/queries/q22.ppl index b4f51f40b21..7ab06c1c70f 100644 --- a/integ-test/src/test/resources/clickbench/queries/q22.ppl +++ b/integ-test/src/test/resources/clickbench/queries/q22.ppl @@ -4,7 +4,7 @@ FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; */ source=hits -| where like(URL, '%google%') and SearchPhrase != '' +| where like(URL, '%google%', true) and SearchPhrase != '' | stats bucket_nullable=false /* min(URL), */ count() as c by SearchPhrase | sort - c | head 10 diff --git a/integ-test/src/test/resources/clickbench/queries/q23.ppl b/integ-test/src/test/resources/clickbench/queries/q23.ppl index eb5ed5b0102..593caa33365 100644 --- a/integ-test/src/test/resources/clickbench/queries/q23.ppl +++ b/integ-test/src/test/resources/clickbench/queries/q23.ppl @@ -4,7 +4,7 @@ FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPh GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; */ source=hits -| where like(Title, '%Google%') and not like(URL, '%.google.%') and SearchPhrase != '' +| where like(Title, '%Google%', true) and not like(URL, '%.google.%', true) and SearchPhrase != '' | stats bucket_nullable=false /* min(URL), min(Title), */ count() as c, dc(UserID) by SearchPhrase | sort - c | head 10 diff --git a/integ-test/src/test/resources/clickbench/queries/q24.ppl b/integ-test/src/test/resources/clickbench/queries/q24.ppl index 1323546a687..18c09075117 100644 --- a/integ-test/src/test/resources/clickbench/queries/q24.ppl +++ b/integ-test/src/test/resources/clickbench/queries/q24.ppl @@ -2,6 +2,6 @@ SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; */ source=hits -| where like(URL, '%google%') +| where like(URL, '%google%', true) | sort EventTime | head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q21.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q21.yaml index 88274ba5655..db43556e0cb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q21.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q21.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalAggregate(group=[{}], count()=[COUNT()]) - LogicalFilter(condition=[ILIKE($26, '%google%', '\')]) + LogicalFilter(condition=[LIKE($26, '%google%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL], FILTER->ILIKE($0, '%google%', '\'), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"wildcard":{"URL":{"wildcard":"*google*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["URL"],"excludes":[]},"track_total_hits":2147483647}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL], FILTER->LIKE($0, '%google%', '\'), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},"_source":{"includes":["URL"],"excludes":[]},"track_total_hits":2147483647}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml index 1fec253178c..c40758c1b3d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml @@ -6,7 +6,7 @@ calcite: LogicalAggregate(group=[{0}], c=[COUNT()]) LogicalProject(SearchPhrase=[$63]) LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[AND(ILIKE($26, '%google%', '\'), <>($63, ''))]) + LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase], FILTER->AND(ILIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","case_insensitive":true,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase], FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml index acbf78ee28d..d38438ad4e9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml @@ -6,7 +6,7 @@ calcite: LogicalAggregate(group=[{0}], c=[COUNT()], dc(UserID)=[COUNT(DISTINCT $1)]) LogicalProject(SearchPhrase=[$63], UserID=[$84]) LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[AND(ILIKE($97, '%Google%', '\'), <>($63, ''), NOT(ILIKE($26, '%.google.%', '\')))]) + LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase, UserID, Title], FILTER->AND(ILIKE($3, '%Google%', '\'), <>($1, ''), NOT(ILIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),dc(UserID)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","case_insensitive":true,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","case_insensitive":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase","UserID","Title"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase, UserID, Title], FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),dc(UserID)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase","UserID","Title"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q24.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q24.yaml index 97c0970f8d6..545df273617 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q24.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q24.yaml @@ -3,12 +3,12 @@ calcite: LogicalSystemLimit(sort0=[$17], dir0=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104]) LogicalSort(sort0=[$17], dir0=[ASC-nulls-first], fetch=[10]) - LogicalFilter(condition=[ILIKE($26, '%google%', '\')]) + LogicalFilter(condition=[LIKE($26, '%google%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLRegionID, HasGCLID, Income, Interests, Robotness, BrowserLanguage, CounterClass, BrowserCountry, OriginalURL, ClientTimeZone, RefererHash, TraficSourceID, HitColor, RefererRegionID, URLCategoryID, LocalEventTime, EventTime, UTMTerm, AdvEngineID, UserAgentMinor, UserAgentMajor, RemoteIP, Sex, JavaEnable, URLHash, URL, ParamOrderID, OpenstatSourceID, HTTPError, SilverlightVersion3, MobilePhoneModel, SilverlightVersion4, SilverlightVersion1, SilverlightVersion2, IsDownload, IsParameter, CLID, FlashMajor, FlashMinor, UTMMedium, WatchID, DontCountHits, CookieEnable, HID, SocialAction, WindowName, ConnectTiming, PageCharset, IsLink, IsArtifical, JavascriptEnable, ClientEventTime, DNSTiming, CodeVersion, ResponseEndTiming, FUniqID, WindowClientHeight, OpenstatServiceName, UTMContent, HistoryLength, IsOldCounter, MobilePhone, SearchPhrase, FlashMinor2, SearchEngineID, IsEvent, UTMSource, RegionID, OpenstatAdID, UTMCampaign, GoodEvent, IsRefresh, ParamCurrency, Params, ResolutionHeight, ClientIP, FromTag, ParamCurrencyID, ResponseStartTiming, ResolutionWidth, SendTiming, RefererCategoryID, OpenstatCampaignID, UserID, WithHash, UserAgent, ParamPrice, ResolutionDepth, IsMobile, Age, SocialSourceNetworkID, OpenerName, OS, IsNotBounce, Referer, NetMinor, Title, NetMajor, IPNetworkID, FetchTiming, SocialNetwork, SocialSourcePage, CounterID, WindowClientWidth], FILTER->ILIKE($26, '%google%', '\'), SORT->[{ + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLRegionID, HasGCLID, Income, Interests, Robotness, BrowserLanguage, CounterClass, BrowserCountry, OriginalURL, ClientTimeZone, RefererHash, TraficSourceID, HitColor, RefererRegionID, URLCategoryID, LocalEventTime, EventTime, UTMTerm, AdvEngineID, UserAgentMinor, UserAgentMajor, RemoteIP, Sex, JavaEnable, URLHash, URL, ParamOrderID, OpenstatSourceID, HTTPError, SilverlightVersion3, MobilePhoneModel, SilverlightVersion4, SilverlightVersion1, SilverlightVersion2, IsDownload, IsParameter, CLID, FlashMajor, FlashMinor, UTMMedium, WatchID, DontCountHits, CookieEnable, HID, SocialAction, WindowName, ConnectTiming, PageCharset, IsLink, IsArtifical, JavascriptEnable, ClientEventTime, DNSTiming, CodeVersion, ResponseEndTiming, FUniqID, WindowClientHeight, OpenstatServiceName, UTMContent, HistoryLength, IsOldCounter, MobilePhone, SearchPhrase, FlashMinor2, SearchEngineID, IsEvent, UTMSource, RegionID, OpenstatAdID, UTMCampaign, GoodEvent, IsRefresh, ParamCurrency, Params, ResolutionHeight, ClientIP, FromTag, ParamCurrencyID, ResponseStartTiming, ResolutionWidth, SendTiming, RefererCategoryID, OpenstatCampaignID, UserID, WithHash, UserAgent, ParamPrice, ResolutionDepth, IsMobile, Age, SocialSourceNetworkID, OpenerName, OS, IsNotBounce, Referer, NetMinor, Title, NetMajor, IPNetworkID, FetchTiming, SocialNetwork, SocialSourcePage, CounterID, WindowClientWidth], FILTER->LIKE($26, '%google%', '\'), SORT->[{ "EventTime" : { "order" : "asc", "missing" : "_first" } - }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"wildcard":{"URL":{"wildcard":"*google*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["EventDate","URLRegionID","HasGCLID","Income","Interests","Robotness","BrowserLanguage","CounterClass","BrowserCountry","OriginalURL","ClientTimeZone","RefererHash","TraficSourceID","HitColor","RefererRegionID","URLCategoryID","LocalEventTime","EventTime","UTMTerm","AdvEngineID","UserAgentMinor","UserAgentMajor","RemoteIP","Sex","JavaEnable","URLHash","URL","ParamOrderID","OpenstatSourceID","HTTPError","SilverlightVersion3","MobilePhoneModel","SilverlightVersion4","SilverlightVersion1","SilverlightVersion2","IsDownload","IsParameter","CLID","FlashMajor","FlashMinor","UTMMedium","WatchID","DontCountHits","CookieEnable","HID","SocialAction","WindowName","ConnectTiming","PageCharset","IsLink","IsArtifical","JavascriptEnable","ClientEventTime","DNSTiming","CodeVersion","ResponseEndTiming","FUniqID","WindowClientHeight","OpenstatServiceName","UTMContent","HistoryLength","IsOldCounter","MobilePhone","SearchPhrase","FlashMinor2","SearchEngineID","IsEvent","UTMSource","RegionID","OpenstatAdID","UTMCampaign","GoodEvent","IsRefresh","ParamCurrency","Params","ResolutionHeight","ClientIP","FromTag","ParamCurrencyID","ResponseStartTiming","ResolutionWidth","SendTiming","RefererCategoryID","OpenstatCampaignID","UserID","WithHash","UserAgent","ParamPrice","ResolutionDepth","IsMobile","Age","SocialSourceNetworkID","OpenerName","OS","IsNotBounce","Referer","NetMinor","Title","NetMajor","IPNetworkID","FetchTiming","SocialNetwork","SocialSourcePage","CounterID","WindowClientWidth"],"excludes":[]},"sort":[{"EventTime":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) + }], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},"_source":{"includes":["EventDate","URLRegionID","HasGCLID","Income","Interests","Robotness","BrowserLanguage","CounterClass","BrowserCountry","OriginalURL","ClientTimeZone","RefererHash","TraficSourceID","HitColor","RefererRegionID","URLCategoryID","LocalEventTime","EventTime","UTMTerm","AdvEngineID","UserAgentMinor","UserAgentMajor","RemoteIP","Sex","JavaEnable","URLHash","URL","ParamOrderID","OpenstatSourceID","HTTPError","SilverlightVersion3","MobilePhoneModel","SilverlightVersion4","SilverlightVersion1","SilverlightVersion2","IsDownload","IsParameter","CLID","FlashMajor","FlashMinor","UTMMedium","WatchID","DontCountHits","CookieEnable","HID","SocialAction","WindowName","ConnectTiming","PageCharset","IsLink","IsArtifical","JavascriptEnable","ClientEventTime","DNSTiming","CodeVersion","ResponseEndTiming","FUniqID","WindowClientHeight","OpenstatServiceName","UTMContent","HistoryLength","IsOldCounter","MobilePhone","SearchPhrase","FlashMinor2","SearchEngineID","IsEvent","UTMSource","RegionID","OpenstatAdID","UTMCampaign","GoodEvent","IsRefresh","ParamCurrency","Params","ResolutionHeight","ClientIP","FromTag","ParamCurrencyID","ResponseStartTiming","ResolutionWidth","SendTiming","RefererCategoryID","OpenstatCampaignID","UserID","WithHash","UserAgent","ParamPrice","ResolutionDepth","IsMobile","Age","SocialSourceNetworkID","OpenerName","OS","IsNotBounce","Referer","NetMinor","Title","NetMajor","IPNetworkID","FetchTiming","SocialNetwork","SocialSourcePage","CounterID","WindowClientWidth"],"excludes":[]},"sort":[{"EventTime":{"order":"asc","missing":"_first"}}]}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_ilike_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_ilike_function.yaml new file mode 100644 index 00000000000..3a891dc6bc4 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_ilike_function.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.yaml index 3a891dc6bc4..f76e6520ae5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')]) + LogicalFilter(condition=[LIKE($1, '%mbe%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->LIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function_case_insensitive.yaml new file mode 100644 index 00000000000..3a891dc6bc4 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_keyword_like_function_case_insensitive.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], FILTER->ILIKE($1, '%mbe%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"wildcard":{"firstname.keyword":{"wildcard":"*mbe*","case_insensitive":true,"boost":1.0}}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_ilike_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_ilike_function.yaml new file mode 100644 index 00000000000..e02a56adbee --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_ilike_function.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJTElLRSIsCiAgICAia2luZCI6ICJMSUtFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogOAogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml index 9eb6e726fbf..5fee25183c9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function.yaml @@ -2,7 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) + LogicalFilter(condition=[LIKE($2, '%Holmes%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJTElLRSIsCiAgICAia2luZCI6ICJMSUtFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogOAogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->LIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCAHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMSUtFIiwKICAgICJraW5kIjogIkxJS0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiA4CiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function_case_insensitive.yaml new file mode 100644 index 00000000000..e02a56adbee --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_text_like_function_case_insensitive.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->ILIKE($2, '%Holmes%', '\'), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCAXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJTElLRSIsCiAgICAia2luZCI6ICJMSUtFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogOAogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2,2],"DIGESTS":["address","%Holmes%","\\"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_ilike_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_ilike_function.yaml new file mode 100644 index 00000000000..f8b576cb814 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_ilike_function.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%mbe%'], expr#18=['\'], expr#19=[ILIKE($t1, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.yaml index f8b576cb814..2d164b50d29 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function.yaml @@ -2,9 +2,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')]) + LogicalFilter(condition=[LIKE($1, '%mbe%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%mbe%'], expr#18=['\'], expr#19=[ILIKE($t1, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%mbe%'], expr#18=['\'], expr#19=[LIKE($t1, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function_case_insensitive.yaml new file mode 100644 index 00000000000..f8b576cb814 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_keyword_like_function_case_insensitive.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[ILIKE($1, '%mbe%', '\')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%mbe%'], expr#18=['\'], expr#19=[ILIKE($t1, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_ilike_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_ilike_function.yaml new file mode 100644 index 00000000000..41638cd1b16 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_ilike_function.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%Holmes%'], expr#18=['\'], expr#19=[ILIKE($t2, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.yaml index 41638cd1b16..6be02086bb0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function.yaml @@ -2,9 +2,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) + LogicalFilter(condition=[LIKE($2, '%Holmes%', '\')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%Holmes%'], expr#18=['\'], expr#19=[ILIKE($t2, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%Holmes%'], expr#18=['\'], expr#19=[LIKE($t2, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function_case_insensitive.yaml new file mode 100644 index 00000000000..41638cd1b16 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_text_like_function_case_insensitive.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[ILIKE($2, '%Holmes%', '\')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=['%Holmes%'], expr#18=['\'], expr#19=[ILIKE($t2, $t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.yaml index 5b950289224..766e6eb5f22 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function.yaml @@ -8,10 +8,9 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\"\ - :{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\"\ - :true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"\ - firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"\ - state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, needClean=true,\ - \ searchDone=false, pitId=*,\ + :{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"boost\"\ + :1.0}}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\"\ + ,\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\"\ + ,\"lastname\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml new file mode 100644 index 00000000000..5b950289224 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml @@ -0,0 +1,17 @@ +root: + name: ProjectOperator + description: + fields: "[account_number, firstname, address, balance, gender, city, employer,\ + \ state, age, email, lastname]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\"\ + :{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\"\ + :true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"\ + firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"\ + state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, needClean=true,\ + \ searchDone=false, pitId=*,\ + \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.yaml index c8af0d94dbe..2433ee66003 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.yaml @@ -6,7 +6,7 @@ root: children: - name: FilterOperator description: - conditions: "like(address, \"%Holmes%\")" + conditions: "like(address, \"%Holmes%\", true)" children: - name: OpenSearchIndexScan description: diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function_case_insensitive.yaml new file mode 100644 index 00000000000..3187fd69900 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function_case_insensitive.yaml @@ -0,0 +1,17 @@ +root: + name: ProjectOperator + description: + fields: "[account_number, firstname, address, balance, gender, city, employer,\ + \ state, age, email, lastname]" + children: + - name: FilterOperator + description: + conditions: "like(address, \"%Holmes%\", false)" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true,\ + \ searchDone=false, pitId=*,\ + \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/tpch/queries/q13.ppl b/integ-test/src/test/resources/tpch/queries/q13.ppl index 2a06a9a831b..3c65ea1245d 100644 --- a/integ-test/src/test/resources/tpch/queries/q13.ppl +++ b/integ-test/src/test/resources/tpch/queries/q13.ppl @@ -23,7 +23,7 @@ order by source = [ source = customer - | left outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%') + | left outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%', true) orders | stats count(o_orderkey) as c_count by c_custkey ] as c_orders diff --git a/integ-test/src/test/resources/tpch/queries/q14.ppl b/integ-test/src/test/resources/tpch/queries/q14.ppl index 286041e3f28..8951e2c93a1 100644 --- a/integ-test/src/test/resources/tpch/queries/q14.ppl +++ b/integ-test/src/test/resources/tpch/queries/q14.ppl @@ -19,7 +19,7 @@ source = lineitem AND l_shipdate >= date('1995-09-01') AND l_shipdate < date_add(date('1995-09-01'), interval 1 month) part -| stats sum(case(like(p_type, 'PROMO%'), l_extendedprice * (1 - l_discount) else 0)) as sum1, +| stats sum(case(like(p_type, 'PROMO%', true), l_extendedprice * (1 - l_discount) else 0)) as sum1, sum(l_extendedprice * (1 - l_discount)) as sum2 | eval promo_revenue = 100.00 * sum1 / sum2 | fields promo_revenue \ No newline at end of file diff --git a/integ-test/src/test/resources/tpch/queries/q16.ppl b/integ-test/src/test/resources/tpch/queries/q16.ppl index 387660ba9e9..86c34d0ab42 100644 --- a/integ-test/src/test/resources/tpch/queries/q16.ppl +++ b/integ-test/src/test/resources/tpch/queries/q16.ppl @@ -34,11 +34,11 @@ order by source = partsupp | join ON p_partkey = ps_partkey part | where p_brand != 'Brand#45' - and not like(p_type, 'MEDIUM POLISHED%') + and not like(p_type, 'MEDIUM POLISHED%', true) and p_size in (49, 14, 23, 45, 19, 3, 36, 9) and ps_suppkey not in [ source = supplier - | where like(s_comment, '%Customer%Complaints%') + | where like(s_comment, '%Customer%Complaints%', true) | fields s_suppkey ] | stats distinct_count(ps_suppkey) as supplier_cnt by p_brand, p_type, p_size diff --git a/integ-test/src/test/resources/tpch/queries/q2.ppl b/integ-test/src/test/resources/tpch/queries/q2.ppl index 59d24b161ba..c7c0c94bd23 100644 --- a/integ-test/src/test/resources/tpch/queries/q2.ppl +++ b/integ-test/src/test/resources/tpch/queries/q2.ppl @@ -50,7 +50,7 @@ source = part | join ON s_suppkey = ps_suppkey supplier | join ON s_nationkey = n_nationkey nation | join ON n_regionkey = r_regionkey region -| where p_size = 15 AND like(p_type, '%BRASS') AND r_name = 'EUROPE' AND ps_supplycost = [ +| where p_size = 15 AND like(p_type, '%BRASS', true) AND r_name = 'EUROPE' AND ps_supplycost = [ source = partsupp | join ON s_suppkey = ps_suppkey supplier | join ON s_nationkey = n_nationkey nation diff --git a/integ-test/src/test/resources/tpch/queries/q20.ppl b/integ-test/src/test/resources/tpch/queries/q20.ppl index 116a172370b..a7a824982ab 100644 --- a/integ-test/src/test/resources/tpch/queries/q20.ppl +++ b/integ-test/src/test/resources/tpch/queries/q20.ppl @@ -45,7 +45,7 @@ source = supplier source = partsupp | where ps_partkey in [ source = part - | where like(p_name, 'forest%') + | where like(p_name, 'forest%', true) | fields p_partkey ] and ps_availqty > [ diff --git a/integ-test/src/test/resources/tpch/queries/q9.ppl b/integ-test/src/test/resources/tpch/queries/q9.ppl index 84b0008f1ed..58980a8ea90 100644 --- a/integ-test/src/test/resources/tpch/queries/q9.ppl +++ b/integ-test/src/test/resources/tpch/queries/q9.ppl @@ -40,7 +40,7 @@ source = [ | join ON ps_partkey = l_partkey and ps_suppkey = l_suppkey partsupp | join ON o_orderkey = l_orderkey orders | join ON s_nationkey = n_nationkey nation - | where like(p_name, '%green%') + | where like(p_name, '%green%', true) | eval nation = n_name | eval o_year = year(o_orderdate) | eval amount = l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index c74356ce977..43d06c5e6b1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -70,6 +70,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.SqlSyntax; +import org.apache.calcite.sql.fun.SqlLikeOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.SqlTypeFamily; @@ -685,7 +686,8 @@ private QueryExpression like(RexCall call) { final Expression a = call.getOperands().get(0).accept(this); final Expression b = call.getOperands().get(1).accept(this); final SwapResult pair = swap(a, b); - return QueryExpression.create(pair.getKey()).like(pair.getValue()); + final boolean caseSensitive = ((SqlLikeOperator) call.getOperator()).isCaseSensitive(); + return QueryExpression.create(pair.getKey()).like(pair.getValue(), caseSensitive); } private static QueryExpression constructQueryExpressionForSearch( @@ -961,7 +963,7 @@ QueryExpression between(Range literal, boolean isTimeStamp) { throw new PredicateAnalyzerException("between cannot be applied to " + this.getClass()); } - QueryExpression like(LiteralExpression literal) { + QueryExpression like(LiteralExpression literal, boolean caseSensitive) { throw new PredicateAnalyzerException( "SqlOperatorImpl ['like'] " + "cannot be applied to " + this.getClass()); } @@ -1244,7 +1246,7 @@ public QueryExpression notExists() { * matching one by one, which is not same behavior with regular like function without pushdown. */ @Override - public QueryExpression like(LiteralExpression literal) { + public QueryExpression like(LiteralExpression literal, boolean caseSensitive) { String fieldName = getFieldReference(); String keywordField = OpenSearchTextType.toKeywordSubField(fieldName, this.rel.getExprType()); boolean isKeywordField = keywordField != null; @@ -1252,7 +1254,7 @@ public QueryExpression like(LiteralExpression literal) { builder = wildcardQuery( keywordField, StringUtils.convertSqlWildcardToLuceneSafe(literal.stringValue())) - .caseInsensitive(true); + .caseInsensitive(!caseSensitive); return this; } throw new UnsupportedOperationException("Like query is not supported for text field"); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java index eb4aaa8600b..f47486bec67 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LikeQuery.java @@ -21,7 +21,14 @@ public class LikeQuery extends LuceneQuery { @Override public QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) { String field = OpenSearchTextType.convertTextToKeyword(fieldName, fieldType); - return createBuilder(field, literal.stringValue()); + return createBuilder(field, literal.stringValue(), false); + } + + @Override + public QueryBuilder doBuild( + String fieldName, ExprType fieldType, ExprValue literal1, ExprValue literal2) { + String field = OpenSearchTextType.convertTextToKeyword(fieldName, fieldType); + return createBuilder(field, literal1.stringValue(), literal2.booleanValue()); } /** @@ -29,9 +36,9 @@ public QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue lite * relevance function which wildcard_query is. The arguments in LIKE are of type * ReferenceExpression while wildcard_query are of type NamedArgumentExpression */ - protected WildcardQueryBuilder createBuilder(String field, String query) { + protected WildcardQueryBuilder createBuilder(String field, String query, Boolean caseSensitive) { String matchText = StringUtils.convertSqlWildcardToLuceneSafe(query); - return QueryBuilders.wildcardQuery(field, matchText).caseInsensitive(true); + return QueryBuilders.wildcardQuery(field, matchText).caseInsensitive(!caseSensitive); } /** @@ -45,10 +52,15 @@ protected WildcardQueryBuilder createBuilder(String field, String query) { */ @Override public boolean canSupport(FunctionExpression func) { - if (func.getArguments().size() == 2 + if ((func.getArguments().size() == 2 || func.getArguments().size() == 3) && (func.getArguments().get(0) instanceof ReferenceExpression) && (func.getArguments().get(1) instanceof LiteralExpression || literalExpressionWrappedByCast(func))) { + if (func.getArguments().size() == 3 + && !(func.getArguments().get(2) instanceof LiteralExpression)) { + // The third argument of like function must be boolean literal + return false; + } ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0); // Only support keyword type field if (OpenSearchTextType.toKeywordSubField(ref.getRawPath(), ref.getType()) != null) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java index cdaf4af3440..426af9a4b11 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java @@ -109,6 +109,10 @@ public QueryBuilder build(FunctionExpression func) { Expression expr = func.getArguments().get(1); ExprValue literalValue = expr instanceof LiteralExpression ? expr.valueOf() : cast((FunctionExpression) expr, ref); + if (func.getArguments().size() == 3) { + return doBuild( + ref.getRawPath(), ref.type(), literalValue, func.getArguments().get(2).valueOf()); + } return doBuild(ref.getRawPath(), ref.type(), literalValue); } @@ -286,6 +290,12 @@ protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue l "Subclass doesn't implement this and build method either"); } + protected QueryBuilder doBuild( + String fieldName, ExprType fieldType, ExprValue literal1, ExprValue literal2) { + throw new UnsupportedOperationException( + "Subclass doesn't implement this and build method either"); + } + /** * Converts a literal value to a formatted date or time value based on the specified field type. * diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 9486f5fe0b8..0c1aef9ddfa 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -600,11 +600,33 @@ void multiMatchRelevanceQueryFunction_generatesMultiMatchQuery() @Test void likeFunction_keywordField_generatesWildcardQuery() throws ExpressionNotAnalyzableException { - List arguments = Arrays.asList(field2, builder.makeLiteral("%Hi%")); + List arguments = + Arrays.asList(field2, builder.makeLiteral("%Hi%"), builder.makeLiteral(true)); RexNode call = PPLFuncImpTable.INSTANCE.resolve(builder, "like", arguments.toArray(new RexNode[0])); QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); assertInstanceOf(WildcardQueryBuilder.class, result); + assertEquals( + """ + { + "wildcard" : { + "b.keyword" : { + "wildcard" : "*Hi*", + "boost" : 1.0 + } + } + }\ + """, + result.toString()); + } + + @Test + void ilikeFunction_keywordField_generatesWildcardQuery() throws ExpressionNotAnalyzableException { + List arguments = Arrays.asList(field2, builder.makeLiteral("%Hi%")); + RexNode call = + PPLFuncImpTable.INSTANCE.resolve(builder, "ilike", arguments.toArray(new RexNode[0])); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + assertInstanceOf(WildcardQueryBuilder.class, result); assertEquals( """ { @@ -623,7 +645,8 @@ void likeFunction_keywordField_generatesWildcardQuery() throws ExpressionNotAnal @Test void likeFunction_textField_scriptPushDown() throws ExpressionNotAnalyzableException { RexInputRef field3 = builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.VARCHAR), 2); - List arguments = Arrays.asList(field3, builder.makeLiteral("%Hi%")); + List arguments = + Arrays.asList(field3, builder.makeLiteral("%Hi%"), builder.makeLiteral(true)); RexNode call = PPLFuncImpTable.INSTANCE.resolve(builder, "like", arguments.toArray(new RexNode[0])); diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 86982893d0c..8abff8b4032 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -429,6 +429,7 @@ CAST: 'CAST'; // BOOL FUNCTIONS LIKE: 'LIKE'; +ILIKE: 'ILIKE'; ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; CIDRMATCH: 'CIDRMATCH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 12a02f897d8..378be713160 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1267,6 +1267,7 @@ timestampFunctionName // condition function return boolean value conditionFunctionName : LIKE + | ILIKE | ISNULL | ISNOTNULL | CIDRMATCH @@ -1329,6 +1330,7 @@ positionFunctionName | NOT_GREATER | REGEXP | LIKE + | ILIKE ; singleFieldRelevanceFunctionName diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 632f822d2fb..1a60ca5f2a3 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -72,6 +72,7 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.WcFieldExpressionContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParserBaseVisitor; import org.opensearch.sql.ppl.utils.ArgumentFactory; +import org.opensearch.sql.ppl.utils.UnresolvedPlanHelper; import org.opensearch.sql.utils.DateTimeUtils; /** Class of building AST Expression nodes. */ @@ -160,9 +161,16 @@ public UnresolvedExpression visitCompareExpr(CompareExprContext ctx) { String operator = ctx.comparisonOperator().getText(); if ("==".equals(operator)) { operator = EQUAL.getName().getFunctionName(); - } else if (LIKE.getName().getFunctionName().equalsIgnoreCase(operator)) { - operator = LIKE.getName().getFunctionName(); + } else if (LIKE.getName().getFunctionName().equalsIgnoreCase(operator) + && UnresolvedPlanHelper.isCalciteEnabled(astBuilder.getSettings())) { + operator = + UnresolvedPlanHelper.legacyPreferred(astBuilder.getSettings()) + ? ILIKE.getName().getFunctionName() + : LIKE.getName().getFunctionName(); + } else if (ILIKE.getName().getFunctionName().equalsIgnoreCase(operator)) { + operator = ILIKE.getName().getFunctionName(); } + return new Compare(operator, visit(ctx.left), visit(ctx.right)); } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 9fab9ba9a0f..41b1d7c2490 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -78,7 +78,9 @@ public static List getArgumentList( getArgumentValue(ctx1.bucketNullableArg(0).bucket_nullable)) : new Argument( Argument.BUCKET_NULLABLE, - legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE))); + UnresolvedPlanHelper.legacyPreferred(settings) + ? Literal.TRUE + : Literal.FALSE))); if (ctx2 != null) { list.add(new Argument("dedupsplit", getArgumentValue(ctx2.dedupsplit))); } else { @@ -87,12 +89,6 @@ public static List getArgumentList( return list; } - private static boolean legacyPreferred(Settings settings) { - return settings == null - || settings.getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED) == null - || Boolean.TRUE.equals(settings.getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED)); - } - /** * Get list of {@link Argument}. * @@ -125,7 +121,7 @@ public static List getArgumentList(EventstatsCommandContext ctx, Setti Argument.BUCKET_NULLABLE, getArgumentValue(ctx.bucketNullableArg().bucket_nullable)) : new Argument( Argument.BUCKET_NULLABLE, - legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE)); + UnresolvedPlanHelper.legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE)); } /** @@ -278,7 +274,7 @@ public static List getArgumentList( RareTopN.Option.useNull.name(), opt.isPresent() ? getArgumentValue(opt.get().useNull) - : legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE)); + : UnresolvedPlanHelper.legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE)); return list; } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index e277dfacbc1..271688776e6 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -19,6 +19,7 @@ import java.util.Locale; import java.util.Objects; import java.util.stream.Collectors; +import lombok.Getter; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.AbstractNodeVisitor; @@ -114,7 +115,7 @@ public class PPLQueryDataAnonymizer extends AbstractNodeVisitor public static final String MASK_TABLE = "table"; private final AnonymizerExpressionAnalyzer expressionAnalyzer; - private final Settings settings; + @Getter private final Settings settings; public PPLQueryDataAnonymizer(Settings settings) { this.expressionAnalyzer = new AnonymizerExpressionAnalyzer(this); @@ -396,7 +397,7 @@ public String visitRareTopN(RareTopN node, String context) { String fields = visitFieldList(node.getFields()); String group = visitExpressionList(node.getGroupExprList()); String options = - isCalciteEnabled(settings) + UnresolvedPlanHelper.isCalciteEnabled(settings) ? StringUtils.format( "countield='%s' showcount=%s usenull=%s ", countField, showCount, useNull) : ""; @@ -797,14 +798,6 @@ private String groupBy(String groupBy) { return Strings.isNullOrEmpty(groupBy) ? "" : StringUtils.format("by %s", groupBy); } - private boolean isCalciteEnabled(Settings settings) { - if (settings != null) { - return settings.getSettingValue(Settings.Key.CALCITE_ENGINE_ENABLED); - } else { - return false; - } - } - /** Expression Anonymizer. */ private static class AnonymizerExpressionAnalyzer extends AbstractNodeVisitor { private final PPLQueryDataAnonymizer queryAnonymizer; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/UnresolvedPlanHelper.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/UnresolvedPlanHelper.java index a502f2d769d..a67507be315 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/UnresolvedPlanHelper.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/UnresolvedPlanHelper.java @@ -10,6 +10,7 @@ import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.UnresolvedPlan; +import org.opensearch.sql.common.setting.Settings; /** The helper to add select to {@link UnresolvedPlan} if needed. */ @UtilityClass @@ -23,4 +24,16 @@ public UnresolvedPlan addSelectAll(UnresolvedPlan plan) { return new Project(ImmutableList.of(AllFields.of())).attach(plan); } } + + public static boolean legacyPreferred(Settings settings) { + return settings == null + || settings.getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED) == null + || Boolean.TRUE.equals(settings.getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED)); + } + + public static boolean isCalciteEnabled(Settings settings) { + return settings == null + || settings.getSettingValue(Settings.Key.CALCITE_ENGINE_ENABLED) == null + || Boolean.TRUE.equals(settings.getSettingValue(Settings.Key.CALCITE_ENGINE_ENABLED)); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java index 42edc924294..43912b90572 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStringFunctionTest.java @@ -281,7 +281,28 @@ public void testToStringDuration() { @Test public void testLike() { - String ppl = "source=EMP | where like(JOB, 'SALE%') | stats count() as cnt"; + String ppl = "source=EMP | where like(JOB, 'SALE%', true) | stats count() as cnt"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalAggregate(group=[{}], cnt=[COUNT()])\n" + + " LogicalFilter(condition=[LIKE($2, 'SALE%', '\\')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "cnt=4\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "" + + "SELECT COUNT(*) `cnt`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `JOB` LIKE 'SALE%' ESCAPE '\\'"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testILike() { + String ppl = "source=EMP | where ilike(JOB, 'SALE%') | stats count() as cnt"; RelNode root = getRelNode(ppl); String expectedLogical = "" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index c83b7dfecb6..2176e51acbf 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -176,17 +176,35 @@ public void testLogicalLikeExpr() { filter(relation("t"), function("like", field("a"), stringLiteral("_a%b%c_d_")))); } + @Test + public void testLogicalLikeExprCaseSensitive() { + assertEqual( + "source=t | where like(a, '_a%b%c_d_', true)", + filter( + relation("t"), + function("like", field("a"), stringLiteral("_a%b%c_d_"), booleanLiteral(true)))); + } + + @Test + public void testLogicalLikeExprCaseInSensitive() { + assertEqual( + "source=t | where like(a, '_a%b%c_d_', false)", + filter( + relation("t"), + function("like", field("a"), stringLiteral("_a%b%c_d_"), booleanLiteral(false)))); + } + @Test public void testLikeOperatorExpr() { // Test LIKE operator syntax assertEqual( "source=t | where a LIKE '_a%b%c_d_'", - filter(relation("t"), compare("like", field("a"), stringLiteral("_a%b%c_d_")))); + filter(relation("t"), compare("ilike", field("a"), stringLiteral("_a%b%c_d_")))); // Test with fields on both sides assertEqual( "source=t | where a LIKE b", - filter(relation("t"), compare("like", field("a"), field("b")))); + filter(relation("t"), compare("ilike", field("a"), field("b")))); } @Test @@ -194,19 +212,19 @@ public void testLikeOperatorCaseInsensitive() { // Test LIKE operator with different cases - all should map to lowercase "like" assertEqual( "source=t | where a LIKE 'pattern'", - filter(relation("t"), compare("like", field("a"), stringLiteral("pattern")))); + filter(relation("t"), compare("ilike", field("a"), stringLiteral("pattern")))); assertEqual( "source=t | where a like 'pattern'", - filter(relation("t"), compare("like", field("a"), stringLiteral("pattern")))); + filter(relation("t"), compare("ilike", field("a"), stringLiteral("pattern")))); assertEqual( "source=t | where a Like 'pattern'", - filter(relation("t"), compare("like", field("a"), stringLiteral("pattern")))); + filter(relation("t"), compare("ilike", field("a"), stringLiteral("pattern")))); assertEqual( "source=t | where a LiKe 'pattern'", - filter(relation("t"), compare("like", field("a"), stringLiteral("pattern")))); + filter(relation("t"), compare("ilike", field("a"), stringLiteral("pattern")))); } @Test diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 0f59e98e74b..45f7611db17 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -53,6 +53,19 @@ public void testWhereCommand() { assertEquals("source=table | where identifier = ***", anonymize("search source=t | where a=1")); } + @Test + public void testLikeFunction() { + assertEquals( + "source=table | where like(identifier,***)", + anonymize("search source=t | where like(a, '%llo%')")); + assertEquals( + "source=table | where like(identifier,***,***)", + anonymize("search source=t | where like(a, '%llo%', true)")); + assertEquals( + "source=table | where like(identifier,***,***)", + anonymize("search source=t | where like(a, '%llo%', false)")); + } + // Fields and Table Command Tests @Test public void testFieldsCommandWithoutArguments() { From 9ecd4a928269475cb625eb73b4049b45a2790e48 Mon Sep 17 00:00:00 2001 From: Xinyu Hao <75524174+ishaoxy@users.noreply.github.com> Date: Wed, 26 Nov 2025 13:38:41 +0800 Subject: [PATCH 78/99] Add `bucket_nullable` argument for `Streamstats` command (#4831) * add bucket_nullable for streamstats Signed-off-by: Xinyu Hao * little change Signed-off-by: Xinyu Hao * fix IT, UT and doc Signed-off-by: Xinyu Hao * fix Signed-off-by: Xinyu Hao * change bucketNullable getOrDefault to get Signed-off-by: Xinyu Hao * fix error Signed-off-by: Xinyu Hao --------- Signed-off-by: Xinyu Hao --- .../opensearch/sql/ast/tree/StreamWindow.java | 29 +- .../sql/calcite/CalciteRelNodeVisitor.java | 55 ++-- docs/user/ppl/cmd/streamstats.rst | 39 ++- .../sql/calcite/remote/CalciteExplainIT.java | 30 +++ .../remote/CalciteStreamstatsCommandIT.java | 252 +++++++++++++++++- .../explain_streamstats_distinct_count.yaml | 9 +- .../explain_streamstats_earliest_latest.yaml | 9 +- ...reamstats_earliest_latest_custom_time.yaml | 9 +- .../calcite/explain_streamstats_global.yaml | 29 +- ...xplain_streamstats_global_null_bucket.yaml | 29 ++ .../explain_streamstats_null_bucket.yaml | 16 ++ .../calcite/explain_streamstats_reset.yaml | 41 +-- ...explain_streamstats_reset_null_bucket.yaml | 38 +++ .../explain_streamstats_distinct_count.yaml | 11 +- .../explain_streamstats_earliest_latest.yaml | 11 +- ...reamstats_earliest_latest_custom_time.yaml | 11 +- .../explain_streamstats_global.yaml | 29 +- ...xplain_streamstats_global_null_bucket.yaml | 30 +++ .../explain_streamstats_null_bucket.yaml | 16 ++ .../explain_streamstats_reset.yaml | 41 +-- ...explain_streamstats_reset_null_bucket.yaml | 38 +++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 9 +- .../sql/ppl/utils/ArgumentFactory.java | 12 +- .../calcite/CalcitePPLStreamstatsTest.java | 48 +++- 25 files changed, 672 insertions(+), 171 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_null_bucket.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset_null_bucket.yaml diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/StreamWindow.java b/core/src/main/java/org/opensearch/sql/ast/tree/StreamWindow.java index ed7bcf10289..c404ed26a38 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/StreamWindow.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/StreamWindow.java @@ -9,6 +9,7 @@ import java.util.List; import lombok.EqualsAndHashCode; import lombok.Getter; +import lombok.RequiredArgsConstructor; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.UnresolvedExpression; @@ -16,6 +17,7 @@ @Getter @ToString @EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor public class StreamWindow extends UnresolvedPlan { private final List windowFunctionList; @@ -23,36 +25,11 @@ public class StreamWindow extends UnresolvedPlan { private final boolean current; private final int window; private final boolean global; + private final boolean bucketNullable; private final UnresolvedExpression resetBefore; private final UnresolvedExpression resetAfter; @ToString.Exclude private UnresolvedPlan child; - /** StreamWindow Constructor. */ - public StreamWindow( - List windowFunctionList, - List groupList, - boolean current, - int window, - boolean global, - UnresolvedExpression resetBefore, - UnresolvedExpression resetAfter) { - this.windowFunctionList = windowFunctionList; - this.groupList = groupList; - this.current = current; - this.window = window; - this.global = global; - this.resetBefore = resetBefore; - this.resetAfter = resetAfter; - } - - public boolean isCurrent() { - return current; - } - - public boolean isGlobal() { - return global; - } - @Override public StreamWindow attach(UnresolvedPlan child) { this.child = child; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 82651861735..1e0c994c182 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -1125,8 +1125,7 @@ private Pair, List> resolveAttributesForAggregation( @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); - Boolean bucketNullable = - (Boolean) statsArgs.getOrDefault(Argument.BUCKET_NULLABLE, Literal.TRUE).getValue(); + Boolean bucketNullable = (Boolean) statsArgs.get(Argument.BUCKET_NULLABLE).getValue(); int nGroup = node.getGroupExprList().size() + (Objects.nonNull(node.getSpan()) ? 1 : 0); BitSet nonNullGroupMask = new BitSet(nGroup); if (!bucketNullable) { @@ -1742,20 +1741,25 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(streamSeq); - // construct groupNotNull predicate - List groupByList = - groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); - List notNullList = - PlanUtils.getSelectColumns(groupByList).stream() - .map(context.relBuilder::field) - .map(context.relBuilder::isNotNull) - .toList(); - RexNode groupNotNull = context.relBuilder.and(notNullList); + if (!node.isBucketNullable()) { + // construct groupNotNull predicate + List groupByList = + groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); + List notNullList = + PlanUtils.getSelectColumns(groupByList).stream() + .map(context.relBuilder::field) + .map(context.relBuilder::isNotNull) + .toList(); + RexNode groupNotNull = context.relBuilder.and(notNullList); + + // wrap each expr: CASE WHEN groupNotNull THEN rawExpr ELSE CAST(NULL AS rawType) END + List wrappedOverExprs = + wrapWindowFunctionsWithGroupNotNull(overExpressions, groupNotNull, context); + context.relBuilder.projectPlus(wrappedOverExprs); + } else { + context.relBuilder.projectPlus(overExpressions); + } - // wrap each expr: CASE WHEN groupNotNull THEN rawExpr ELSE CAST(NULL AS rawType) END - List wrappedOverExprs = - wrapWindowFunctionsWithGroupNotNull(overExpressions, groupNotNull, context); - context.relBuilder.projectPlus(wrappedOverExprs); // resort when there is by condition context.relBuilder.sort(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); @@ -1811,11 +1815,11 @@ private RelNode buildStreamWindowJoinPlan( RexNode segRight = context.relBuilder.field(segmentCol); RexNode segOuter = context.relBuilder.field(v.get(), segmentCol); RexNode frame = buildResetFrameFilter(context, node, outerSeq, rightSeq, segOuter, segRight); - RexNode group = buildGroupFilter(context, groupList, v.get()); + RexNode group = buildGroupFilter(context, node, groupList, v.get()); filter = (group == null) ? frame : context.relBuilder.and(frame, group); } else { // global + window + by condition RexNode frame = buildFrameFilter(context, node, outerSeq, rightSeq); - RexNode group = buildGroupFilter(context, groupList, v.get()); + RexNode group = buildGroupFilter(context, node, groupList, v.get()); filter = context.relBuilder.and(frame, group); } context.relBuilder.filter(filter); @@ -1965,7 +1969,10 @@ private RexNode buildResetFrameFilter( } private RexNode buildGroupFilter( - CalcitePlanContext context, List groupList, RexCorrelVariable correl) { + CalcitePlanContext context, + StreamWindow node, + List groupList, + RexCorrelVariable correl) { // build conjunctive equality filters: right.g_i = outer.g_i if (groupList.isEmpty()) { return null; @@ -1977,7 +1984,17 @@ private RexNode buildGroupFilter( String groupName = extractGroupFieldName(expr); RexNode rightGroup = context.relBuilder.field(groupName); RexNode outerGroup = context.relBuilder.field(correl, groupName); - return context.relBuilder.equals(rightGroup, outerGroup); + RexNode equalCondition = context.relBuilder.equals(rightGroup, outerGroup); + // handle bucket_nullable case + if (!node.isBucketNullable()) { + return equalCondition; + } else { + RexNode bothNull = + context.relBuilder.and( + context.relBuilder.isNull(rightGroup), + context.relBuilder.isNull(outerGroup)); + return context.relBuilder.or(equalCondition, bothNull); + } }) .toList(); return context.relBuilder.and(equalsList); diff --git a/docs/user/ppl/cmd/streamstats.rst b/docs/user/ppl/cmd/streamstats.rst index ccda3383277..e38df779791 100644 --- a/docs/user/ppl/cmd/streamstats.rst +++ b/docs/user/ppl/cmd/streamstats.rst @@ -50,9 +50,14 @@ All of these commands can be used to generate aggregations such as average, sum, Syntax ====== -streamstats [current=] [window=] [global=] [reset_before="("")"] [reset_after="("")"] ... [by-clause] +streamstats [bucket_nullable=bool] [current=] [window=] [global=] [reset_before="("")"] [reset_after="("")"] ... [by-clause] * function: mandatory. A aggregation function or window function. +* bucket_nullable: optional. Controls whether the streamstats command consider null buckets as a valid group in group-by aggregations. When set to ``false``, it will not treat null group-by values as a distinct group during aggregation. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. + + * When ``plugins.ppl.syntax.legacy.preferred=true``, ``bucket_nullable`` defaults to ``true`` + * When ``plugins.ppl.syntax.legacy.preferred=false``, ``bucket_nullable`` defaults to ``false`` + * current: optional. If true, the search includes the given, or current, event in the summary calculations. If false, the search uses the field value from the previous event. Syntax: current=. **Default:** true. * window: optional. Specifies the number of events to use when computing the statistics. Syntax: window=. **Default:** 0, which means that all previous and current events are used. * global: optional. Used only when the window argument is set. Defines whether to use a single window, global=true, or to use separate windows based on the by clause. If global=false and window is set to a non-zero value, a separate window is used for each group of values of the field specified in the by clause. Syntax: global=. **Default:** true. @@ -235,4 +240,34 @@ PPL query:: | Peter | Canada | B.C | 4 | 2023 | 57 | null | | Rick | Canada | B.C | 4 | 2023 | 70 | null | | David | USA | Washington | 4 | 2023 | 40 | null | - +-------+---------+------------+-------+------+-----+---------+ \ No newline at end of file + +-------+---------+------------+-------+------+-----+---------+ + + +Example 5: Null buckets handling +================================ + +PPL query:: + + os> source=accounts | streamstats bucket_nullable=false count() as cnt by employer | fields account_number, firstname, employer, cnt; + fetched rows / total rows = 4/4 + +----------------+-----------+----------+------+ + | account_number | firstname | employer | cnt | + |----------------+-----------+----------+------| + | 1 | Amber | Pyrami | 1 | + | 6 | Hattie | Netagy | 1 | + | 13 | Nanette | Quility | 1 | + | 18 | Dale | null | null | + +----------------+-----------+----------+------+ + +PPL query:: + + os> source=accounts | streamstats bucket_nullable=true count() as cnt by employer | fields account_number, firstname, employer, cnt; + fetched rows / total rows = 4/4 + +----------------+-----------+----------+-----+ + | account_number | firstname | employer | cnt | + |----------------+-----------+----------+-----| + | 1 | Amber | Pyrami | 1 | + | 6 | Hattie | Netagy | 1 | + | 13 | Nanette | Quility | 1 | + | 18 | Dale | null | 1 | + +----------------+-----------+----------+-----+ \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 06993dab289..3814da35540 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -667,6 +667,36 @@ public void testStreamstatsResetExplain() throws IOException { assertYamlEqualsIgnoreId(expected, result); } + @Test + public void testStreamstatsNullBucketExplain() throws IOException { + String query = + "source=opensearch-sql_test_index_account | streamstats bucket_nullable=false avg(age) as" + + " avg_age by gender"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_streamstats_null_bucket.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testStreamstatsGlobalNullBucketExplain() throws IOException { + String query = + "source=opensearch-sql_test_index_account | streamstats bucket_nullable=false window=2" + + " global=true avg(age) as avg_age by gender"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_streamstats_global_null_bucket.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + + @Test + public void testStreamstatsResetNullBucketExplain() throws IOException { + String query = + "source=opensearch-sql_test_index_account | streamstats bucket_nullable=false current=false" + + " reset_before=age>34 reset_after=age<25 avg(age) as avg_age by gender"; + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_streamstats_reset_null_bucket.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } + @Test public void testKeywordILikeFunctionExplain() throws IOException { // ilike is only supported in v3 diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java index ae1fb4f2b1c..dcf36f510bf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java @@ -147,7 +147,7 @@ public void testStreamstatsByWithNull() throws IOException { rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5, 20, 25), rows(null, "Canada", null, 4, 2023, 10, 3, 18.333333333333332, 10, 25), - rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); + rows("Kevin", null, null, 4, 2023, null, 1, null, null, null)); actual = executeQuery( @@ -155,6 +155,53 @@ public void testStreamstatsByWithNull() throws IOException { "source=%s | streamstats count() as cnt, avg(age) as avg, min(age) as min, max(age)" + " as max by state", TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 1, 30, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 1, 20, 20, 20), + rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), + rows("Kevin", null, null, 4, 2023, null, 2, 10, 10, 10)); + } + + @Test + public void testStreamstatsByWithNullBucket() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=false count() as cnt, avg(age) as avg," + + " min(age) as min, max(age) as max by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifySchemaInOrder( + actual, + schema("name", "string"), + schema("country", "string"), + schema("state", "string"), + schema("month", "int"), + schema("year", "int"), + schema("age", "int"), + schema("cnt", "bigint"), + schema("avg", "double"), + schema("min", "int"), + schema("max", "int")); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 2, 50, 30, 70), + rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5, 20, 25), + rows(null, "Canada", null, 4, 2023, 10, 3, 18.333333333333332, 10, 25), + rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); + + actual = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=false count() as cnt, avg(age) as avg," + + " min(age) as min, max(age) as max by state", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); verifyDataRows( actual, rows("Jake", "USA", "California", 4, 2023, 70, 1, 70, 70, 70), @@ -198,7 +245,7 @@ public void testStreamstatsBySpanWithNull() throws IOException { rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5, 20, 25), rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), - rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); + rows("Kevin", null, null, 4, 2023, null, 1, null, null, null)); } @Test @@ -240,8 +287,8 @@ public void testStreamstatsByMultiplePartitionsWithNull1() throws IOException { JSONObject actual = executeQuery( String.format( - "source=%s | streamstats count() as cnt, avg(age) as avg, min(age) as min, max(age)" - + " as max by span(age, 10) as age_span, country", + "source=%s | streamstats bucket_nullable=false count() as cnt, avg(age) as avg," + + " min(age) as min, max(age) as max by span(age, 10) as age_span, country", TEST_INDEX_STATE_COUNTRY_WITH_NULL)); verifyDataRows( @@ -252,6 +299,22 @@ public void testStreamstatsByMultiplePartitionsWithNull1() throws IOException { rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5, 20, 25), rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=true count() as cnt, avg(age) as avg," + + " min(age) as min, max(age) as max by span(age, 10) as age_span, country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 1, 30, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 2, 22.5, 20, 25), + rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), + rows("Kevin", null, null, 4, 2023, null, 1, null, null, null)); } @Test @@ -259,8 +322,8 @@ public void testStreamstatsByMultiplePartitionsWithNull2() throws IOException { JSONObject actual = executeQuery( String.format( - "source=%s | streamstats count() as cnt, avg(age) as avg, min(age) as min, max(age)" - + " as max by span(age, 10) as age_span, state", + "source=%s | streamstats bucket_nullable=false count() as cnt, avg(age) as avg," + + " min(age) as min, max(age) as max by span(age, 10) as age_span, state", TEST_INDEX_STATE_COUNTRY_WITH_NULL)); verifyDataRows( @@ -271,6 +334,22 @@ public void testStreamstatsByMultiplePartitionsWithNull2() throws IOException { rows("Jane", "Canada", "Quebec", 4, 2023, 20, 1, 20, 20, 20), rows(null, "Canada", null, 4, 2023, 10, null, null, null, null), rows("Kevin", null, null, 4, 2023, null, null, null, null, null)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=true count() as cnt, avg(age) as avg," + + " min(age) as min, max(age) as max by span(age, 10) as age_span, state", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 1, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 1, 30, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 1, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 1, 20, 20, 20), + rows(null, "Canada", null, 4, 2023, 10, 1, 10, 10, 10), + rows("Kevin", null, null, 4, 2023, null, 1, null, null, null)); } @Test @@ -502,6 +581,61 @@ public void testStreamstatsGlobalWithNull() throws IOException { } } + @Test + public void testStreamstatsGlobalWithNullBucket() throws IOException { + final int docId = 7; + Request insertRequest = + new Request( + "PUT", + String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY_WITH_NULL, docId)); + insertRequest.setJsonEntity( + "{\"name\": \"Jay\",\"age\": 40,\"state\":" + + " \"Quebec\",\"country\": \"USA\",\"year\": 2023,\"month\":" + + " 4}\n"); + client().performRequest(insertRequest); + try { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=false window=2 global=true avg(age) as" + + " avg by state", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20), + rows(null, "Canada", null, 4, 2023, 10, null), + rows("Kevin", null, null, 4, 2023, null, null), + rows("Jay", "USA", "Quebec", 4, 2023, 40, 40)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=true window=2 global=true avg(age) as" + + " avg by state", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20), + rows(null, "Canada", null, 4, 2023, 10, 10), + rows("Kevin", null, null, 4, 2023, null, 10), + rows("Jay", "USA", "Quebec", 4, 2023, 40, 40)); + } finally { + Request deleteRequest = + new Request( + "DELETE", + String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY_WITH_NULL, docId)); + client().performRequest(deleteRequest); + } + } + @Test public void testStreamstatsReset() throws IOException { final int docId = 5; @@ -602,6 +736,61 @@ public void testStreamstatsResetWithNull() throws IOException { } } + @Test + public void testStreamstatsResetWithNullBucket() throws IOException { + final int docId = 7; + Request insertRequest = + new Request( + "PUT", + String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY_WITH_NULL, docId)); + insertRequest.setJsonEntity( + "{\"name\": \"Jay\",\"age\": 28,\"state\":" + + " \"Quebec\",\"country\": \"USA\",\"year\": 2023,\"month\":" + + " 4}\n"); + client().performRequest(insertRequest); + try { + JSONObject actual = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=true window=2 reset_before=age>29" + + " avg(age) as avg by state", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20), + rows(null, "Canada", null, 4, 2023, 10, 10), + rows("Kevin", null, null, 4, 2023, null, 10), + rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=false window=2 reset_after=age>22" + + " avg(age) as avg by state", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20), + rows(null, "Canada", null, 4, 2023, 10, null), + rows("Kevin", null, null, 4, 2023, null, null), + rows("Jay", "USA", "Quebec", 4, 2023, 28, 28)); + } finally { + Request deleteRequest = + new Request( + "DELETE", + String.format("/%s/_doc/%d?refresh=true", TEST_INDEX_STATE_COUNTRY_WITH_NULL, docId)); + client().performRequest(deleteRequest); + } + } + @Test public void testUnsupportedWindowFunctions() { List unsupported = List.of("PERCENTILE_APPROX", "PERCENTILE"); @@ -649,6 +838,23 @@ public void testMultipleStreamstatsWithNull1() throws IOException { rows("Hello", "USA", "New York", 4, 2023, 30, 30, 50), rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5), + rows(null, "Canada", null, 4, 2023, 10, 10, 18.333333333333332), + rows("Kevin", null, null, 4, 2023, null, null, null)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=false avg(age) as avg_age by state," + + " country | streamstats bucket_nullable=false avg(avg_age) as avg_state_age" + + " by country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5), rows(null, "Canada", null, 4, 2023, 10, null, 22.5), rows("Kevin", null, null, 4, 2023, null, null, null)); } @@ -678,6 +884,22 @@ public void testMultipleStreamstatsWithNull2() throws IOException { rows("Hello", "USA", "New York", 4, 2023, 30, 30, 50), rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 25), rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5), + rows("Jay", "USA", null, 4, 2023, 28, 28, 42.666666666666664)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | streamstats bucket_nullable=false avg(age) as avg_age by state," + + " country | streamstats bucket_nullable=false avg(avg_age) as avg_state_age" + + " by country", + TEST_INDEX_STATE_COUNTRY)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, 70, 70), + rows("Hello", "USA", "New York", 4, 2023, 30, 30, 50), + rows("John", "Canada", "Ontario", 4, 2023, 25, 25, 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, 20, 22.5), rows("Jay", "USA", null, 4, 2023, 28, null, 50)); } finally { Request deleteRequest = @@ -790,7 +1012,7 @@ public void testMultipleStreamstatsWithEval2() throws IOException { executeQuery( String.format( "source=%s | eval new_state=lower(state), new_country=lower(country) | streamstats" - + " avg(age) as avg_age by new_state, new_country", + + " bucket_nullable=false avg(age) as avg_age by new_state, new_country", TEST_INDEX_STATE_COUNTRY_WITH_NULL)); verifySchemaInOrder( @@ -813,6 +1035,22 @@ public void testMultipleStreamstatsWithEval2() throws IOException { rows("Jane", "Canada", "Quebec", 4, 2023, 20, "quebec", "canada", 20), rows(null, "Canada", null, 4, 2023, 10, null, "canada", null), rows("Kevin", null, null, 4, 2023, null, null, null, null)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | eval new_state=lower(state), new_country=lower(country) | streamstats" + + " bucket_nullable=true avg(age) as avg_age by new_state, new_country", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + + verifyDataRows( + actual2, + rows("Jake", "USA", "California", 4, 2023, 70, "california", "usa", 70), + rows("Hello", "USA", "New York", 4, 2023, 30, "new york", "usa", 30), + rows("John", "Canada", "Ontario", 4, 2023, 25, "ontario", "canada", 25), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "quebec", "canada", 20), + rows(null, "Canada", null, 4, 2023, 10, null, "canada", 10), + rows("Kevin", null, null, 4, 2023, null, null, null, null)); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml index c9ef1ca9ebd..32538ab17df 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml @@ -3,14 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[CASE(IS NOT NULL($4), DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:BIGINT)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..13=[{inputs}], expr#14=[null:BIGINT], expr#15=[CASE($t12, $t13, $t14)], proj#0..10=[{exprs}], distinct_states=[$t15]) + EnumerableCalc(expr#0..12=[{inputs}], proj#0..10=[{exprs}], distinct_states=[$t12]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$11], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t4)], proj#0..12=[{exprs}]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml index aac0fab3748..cac21b929ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml @@ -3,14 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[CASE(IS NOT NULL($1), ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING), null:VARCHAR)], latest_message=[CASE(IS NOT NULL($1), ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING), null:VARCHAR)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - EnumerableCalc(expr#0..8=[{inputs}], expr#9=[null:VARCHAR], expr#10=[CASE($t6, $t7, $t9)], expr#11=[CASE($t6, $t8, $t9)], proj#0..4=[{exprs}], earliest_message=[$t10], latest_message=[$t11]) + EnumerableCalc(expr#0..7=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t6], latest_message=[$t7]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$5], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) - EnumerableCalc(expr#0..5=[{inputs}], expr#6=[IS NOT NULL($t1)], proj#0..6=[{exprs}]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml index e86cfb8236c..f19625d85e5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml @@ -3,14 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[CASE(IS NOT NULL($4), ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:VARCHAR)], latest_message=[CASE(IS NOT NULL($4), ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:VARCHAR)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - EnumerableCalc(expr#0..8=[{inputs}], expr#9=[null:VARCHAR], expr#10=[CASE($t6, $t7, $t9)], expr#11=[CASE($t6, $t8, $t9)], proj#0..4=[{exprs}], earliest_message=[$t10], latest_message=[$t11]) + EnumerableCalc(expr#0..7=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t6], latest_message=[$t7]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$5], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) - EnumerableCalc(expr#0..5=[{inputs}], expr#6=[IS NOT NULL($t4)], proj#0..6=[{exprs}]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml index 293dd785f96..a00d5b40cfa 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global.yaml @@ -7,23 +7,24 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) LogicalAggregate(group=[{}], avg_age=[AVG($8)]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), =($4, $cor0.gender))]) + LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t18]) EnumerableLimit(fetch=[10000]) - EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) - EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[1], expr#13=[-($t11, $t12)], proj#0..11=[{exprs}], $f12=[$t13]) + EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) + EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + EnumerableCalc(expr#0..11=[{inputs}], expr#12=[1], expr#13=[-($t11, $t12)], expr#14=[IS NULL($t4)], proj#0..11=[{exprs}], $f12=[$t13], $f13=[$t14]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) - EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) - EnumerableHashJoin(condition=[AND(=($0, $3), >=($5, $2), <=($5, $1))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2}]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[-($t1, $t2)], proj#0..1=[{exprs}], $f2=[$t3]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) + EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) + EnumerableNestedLoopJoin(condition=[AND(>=($6, $2), <=($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) + EnumerableAggregate(group=[{0, 1, 2, 3}]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[-($t1, $t2)], expr#4=[IS NULL($t0)], proj#0..1=[{exprs}], $f2=[$t3], $f3=[$t4]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml new file mode 100644 index 00000000000..293dd785f96 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_global_null_bucket.yaml @@ -0,0 +1,29 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) + LogicalSort(sort0=[$17], dir0=[ASC]) + LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{}], avg_age=[AVG($8)]) + LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), =($4, $cor0.gender))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableLimit(fetch=[10000]) + EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) + EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableCalc(expr#0..11=[{inputs}], expr#12=[1], expr#13=[-($t11, $t12)], proj#0..11=[{exprs}], $f12=[$t13]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) + EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) + EnumerableHashJoin(condition=[AND(=($0, $3), >=($5, $2), <=($5, $1))], joinType=[inner]) + EnumerableAggregate(group=[{0, 1, 2}]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[-($t1, $t2)], proj#0..1=[{exprs}], $f2=[$t3]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml new file mode 100644 index 00000000000..fe79eb90faa --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml @@ -0,0 +1,16 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) + LogicalSort(sort0=[$17], dir0=[ASC]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], avg_age=[CASE(IS NOT NULL($4), /(SUM($8) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), CAST(COUNT($8) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)):DOUBLE NOT NULL), null:DOUBLE)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..14=[{inputs}], expr#15=[CAST($t14):DOUBLE NOT NULL], expr#16=[/($t13, $t15)], expr#17=[null:DOUBLE], expr#18=[CASE($t12, $t16, $t17)], proj#0..10=[{exprs}], avg_age=[$t18]) + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($8), COUNT($8)])]) + EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t4)], proj#0..12=[{exprs}]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml index 0e8ed3a3dde..fd739ac5cf5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml @@ -8,31 +8,32 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) LogicalAggregate(group=[{}], avg_age=[AVG($8)]) - LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), =($4, $cor0.gender))]) + LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t18]) EnumerableLimit(fetch=[10000]) - EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) - EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[0], expr#17=[COALESCE($t15, $t16)], expr#18=[+($t14, $t17)], proj#0..11=[{exprs}], __seg_id__=[$t18]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[34], expr#13=[>($t8, $t12)], expr#14=[1], expr#15=[0], expr#16=[CASE($t13, $t14, $t15)], expr#17=[25], expr#18=[<($t8, $t17)], expr#19=[CASE($t18, $t14, $t15)], proj#0..11=[{exprs}], __reset_before_flag__=[$t16], __reset_after_flag__=[$t19]) + EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) + EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[0], expr#18=[COALESCE($t16, $t17)], expr#19=[+($t15, $t18)], proj#0..11=[{exprs}], __seg_id__=[$t19], $f16=[$t14]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $15 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) + EnumerableCalc(expr#0..11=[{inputs}], expr#12=[34], expr#13=[>($t8, $t12)], expr#14=[1], expr#15=[0], expr#16=[CASE($t13, $t14, $t15)], expr#17=[25], expr#18=[<($t8, $t17)], expr#19=[CASE($t18, $t14, $t15)], expr#20=[IS NULL($t4)], proj#0..11=[{exprs}], __reset_before_flag__=[$t16], __reset_after_flag__=[$t19], $14=[$t20]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) - EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) - EnumerableHashJoin(condition=[AND(=($2, $6), =($0, $3), <($5, $1))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2}]) - EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[COALESCE($t5, $t6)], expr#8=[+($t4, $t7)], proj#0..1=[{exprs}], __seg_id__=[$t8]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], gender=[$t0], __stream_seq__=[$t2], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) + EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) + EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) + EnumerableHashJoin(condition=[AND(=($2, $7), <($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) + EnumerableAggregate(group=[{0, 1, 2, 3}]) + EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..1=[{exprs}], __seg_id__=[$t9], $f16=[$t4]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], expr#11=[IS NULL($t0)], gender=[$t0], __stream_seq__=[$t2], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10], $4=[$t11]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], proj#0..2=[{exprs}], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], proj#0..2=[{exprs}], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml new file mode 100644 index 00000000000..0e8ed3a3dde --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml @@ -0,0 +1,38 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$21]) + LogicalSort(sort0=[$17], dir0=[ASC]) + LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17, 20}]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{}], avg_age=[AVG($8)]) + LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), =($4, $cor0.gender))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableLimit(fetch=[10000]) + EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) + EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[0], expr#17=[COALESCE($t15, $t16)], expr#18=[+($t14, $t17)], proj#0..11=[{exprs}], __seg_id__=[$t18]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) + EnumerableCalc(expr#0..11=[{inputs}], expr#12=[34], expr#13=[>($t8, $t12)], expr#14=[1], expr#15=[0], expr#16=[CASE($t13, $t14, $t15)], expr#17=[25], expr#18=[<($t8, $t17)], expr#19=[CASE($t18, $t14, $t15)], proj#0..11=[{exprs}], __reset_before_flag__=[$t16], __reset_after_flag__=[$t19]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) + EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) + EnumerableHashJoin(condition=[AND(=($2, $6), =($0, $3), <($5, $1))], joinType=[inner]) + EnumerableAggregate(group=[{0, 1, 2}]) + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[COALESCE($t5, $t6)], expr#8=[+($t4, $t7)], proj#0..1=[{exprs}], __seg_id__=[$t8]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], gender=[$t0], __stream_seq__=[$t2], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], proj#0..2=[{exprs}], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml index f04a18c5f16..550cf0ea9cb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml @@ -3,14 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[CASE(IS NOT NULL($4), DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:BIGINT)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..13=[{inputs}], expr#14=[null:BIGINT], expr#15=[CASE($t12, $t13, $t14)], proj#0..10=[{exprs}], distinct_states=[$t15]) + EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], distinct_states=[$t18]) EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableSort(sort0=[$17], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NOT NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $12=[$t18]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml index af7d996dfb8..c37fae48771 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml @@ -3,14 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[CASE(IS NOT NULL($1), ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING), null:VARCHAR)], latest_message=[CASE(IS NOT NULL($1), ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING), null:VARCHAR)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - EnumerableCalc(expr#0..8=[{inputs}], expr#9=[null:VARCHAR], expr#10=[CASE($t6, $t7, $t9)], expr#11=[CASE($t6, $t8, $t9)], proj#0..4=[{exprs}], earliest_message=[$t10], latest_message=[$t11]) + EnumerableCalc(expr#0..13=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t12], latest_message=[$t13]) EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$5], dir0=[ASC]) + EnumerableSort(sort0=[$11], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t1)], proj#0..4=[{exprs}], __stream_seq__=[$t11], $6=[$t12]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml index c5c4eec782f..b85e4b6b7bb 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml @@ -3,14 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[CASE(IS NOT NULL($4), ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:VARCHAR)], latest_message=[CASE(IS NOT NULL($4), ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), null:VARCHAR)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - EnumerableCalc(expr#0..8=[{inputs}], expr#9=[null:VARCHAR], expr#10=[CASE($t6, $t7, $t9)], expr#11=[CASE($t6, $t8, $t9)], proj#0..4=[{exprs}], earliest_message=[$t10], latest_message=[$t11]) + EnumerableCalc(expr#0..13=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t12], latest_message=[$t13]) EnumerableLimit(fetch=[10000]) - EnumerableSort(sort0=[$5], dir0=[ASC]) + EnumerableSort(sort0=[$11], dir0=[ASC]) EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) - EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t4)], proj#0..4=[{exprs}], __stream_seq__=[$t11], $6=[$t12]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml index 3ac52e02f55..191bd987a16 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global.yaml @@ -7,24 +7,25 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) LogicalAggregate(group=[{}], avg_age=[AVG($8)]) - LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), =($4, $cor0.gender))]) + LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t18]) EnumerableLimit(fetch=[10000]) - EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) - EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $f12=[$t19]) + EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) + EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], expr#20=[IS NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $f12=[$t19], $f15=[$t20]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) - EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) - EnumerableHashJoin(condition=[AND(=($0, $3), >=($5, $2), <=($5, $1))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2}]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], gender=[$t4], __stream_seq__=[$t17], $f12=[$t19]) + EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) + EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) + EnumerableNestedLoopJoin(condition=[AND(>=($6, $2), <=($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) + EnumerableAggregate(group=[{0, 1, 2, 3}]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], expr#20=[IS NULL($t4)], gender=[$t4], __stream_seq__=[$t17], $f12=[$t19], $f15=[$t20]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..17=[{inputs}], gender=[$t4], age=[$t8], $2=[$t17]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..17=[{inputs}], gender=[$t4], age=[$t8], $2=[$t17]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml new file mode 100644 index 00000000000..3ac52e02f55 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_global_null_bucket.yaml @@ -0,0 +1,30 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) + LogicalSort(sort0=[$17], dir0=[ASC]) + LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17}]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{}], avg_age=[AVG($8)]) + LogicalFilter(condition=[AND(>=($17, -($cor0.__stream_seq__, 1)), <=($17, $cor0.__stream_seq__), =($4, $cor0.gender))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableLimit(fetch=[10000]) + EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) + EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $f12=[$t19]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) + EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) + EnumerableHashJoin(condition=[AND(=($0, $3), >=($5, $2), <=($5, $1))], joinType=[inner]) + EnumerableAggregate(group=[{0, 1, 2}]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[-($t17, $t18)], gender=[$t4], __stream_seq__=[$t17], $f12=[$t19]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..17=[{inputs}], gender=[$t4], age=[$t8], $2=[$t17]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_null_bucket.yaml new file mode 100644 index 00000000000..08876045225 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_null_bucket.yaml @@ -0,0 +1,16 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) + LogicalSort(sort0=[$17], dir0=[ASC]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], avg_age=[CASE(IS NOT NULL($4), /(SUM($8) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), CAST(COUNT($8) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)):DOUBLE NOT NULL), null:DOUBLE)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..14=[{inputs}], expr#15=[CAST($t14):DOUBLE NOT NULL], expr#16=[/($t13, $t15)], expr#17=[null:DOUBLE], expr#18=[CASE($t12, $t16, $t17)], proj#0..10=[{exprs}], avg_age=[$t18]) + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($8), COUNT($8)])]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NOT NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $12=[$t18]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset.yaml index be28e9b1d8c..7ca329dac6a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset.yaml @@ -8,31 +8,32 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) LogicalAggregate(group=[{}], avg_age=[AVG($8)]) - LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), =($4, $cor0.gender))]) + LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t18]) EnumerableLimit(fetch=[10000]) - EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) - EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableCalc(expr#0..15=[{inputs}], expr#16=[0], expr#17=[COALESCE($t15, $t16)], expr#18=[+($t14, $t17)], proj#0..11=[{exprs}], __seg_id__=[$t18]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], proj#0..10=[{exprs}], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) + EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) + EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[0], expr#18=[COALESCE($t16, $t17)], expr#19=[+($t15, $t18)], proj#0..11=[{exprs}], __seg_id__=[$t19], $f16=[$t14]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $15 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], expr#26=[IS NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25], $14=[$t26]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) - EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) - EnumerableHashJoin(condition=[AND(=($2, $6), =($0, $3), <($5, $1))], joinType=[inner]) - EnumerableAggregate(group=[{0, 1, 2}]) - EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[COALESCE($t5, $t6)], expr#8=[+($t4, $t7)], proj#0..1=[{exprs}], __seg_id__=[$t8]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], gender=[$t4], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) + EnumerableSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC]) + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t4)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t5)], proj#0..3=[{exprs}], avg_age=[$t11]) + EnumerableAggregate(group=[{0, 1, 2, 3}], agg#0=[$SUM0($5)], agg#1=[COUNT($5)]) + EnumerableHashJoin(condition=[AND(=($2, $7), <($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) + EnumerableAggregate(group=[{0, 1, 2, 3}]) + EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..1=[{exprs}], __seg_id__=[$t9], $f16=[$t4]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], expr#26=[IS NULL($t4)], gender=[$t4], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25], $4=[$t26]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], gender=[$t4], age=[$t8], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) - EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], gender=[$t4], age=[$t8], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset_null_bucket.yaml new file mode 100644 index 00000000000..be28e9b1d8c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset_null_bucket.yaml @@ -0,0 +1,38 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$21]) + LogicalSort(sort0=[$17], dir0=[ASC]) + LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17, 20}]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalAggregate(group=[{}], avg_age=[AVG($8)]) + LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), =($4, $cor0.gender))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..16=[{inputs}], proj#0..10=[{exprs}], avg_age=[$t16]) + EnumerableLimit(fetch=[10000]) + EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) + EnumerableSort(sort0=[$11], dir0=[ASC]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[0], expr#17=[COALESCE($t15, $t16)], expr#18=[+($t14, $t17)], proj#0..11=[{exprs}], __seg_id__=[$t18]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], proj#0..10=[{exprs}], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], proj#0..2=[{exprs}], avg_age=[$t10]) + EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($4)], agg#1=[COUNT($4)]) + EnumerableHashJoin(condition=[AND(=($2, $6), =($0, $3), <($5, $1))], joinType=[inner]) + EnumerableAggregate(group=[{0, 1, 2}]) + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[COALESCE($t5, $t6)], expr#8=[+($t4, $t7)], proj#0..1=[{exprs}], __seg_id__=[$t8]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], gender=[$t4], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], gender=[$t4], age=[$t8], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 378be713160..0a38820a1bb 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -263,7 +263,7 @@ streamstatsCommand ; streamstatsArgs - : (currentArg | windowArg | globalArg | resetBeforeArg | resetAfterArg)* + : (currentArg | windowArg | globalArg | resetBeforeArg | resetAfterArg | bucketNullableArg)* ; currentArg diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index cf674131d92..c83298edeab 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -486,8 +486,7 @@ public UnresolvedPlan visitEventstatsCommand(OpenSearchPPLParser.EventstatsComma ArgumentMap arguments = ArgumentMap.of(argExprList); // bucket_nullable - boolean bucketNullable = - (Boolean) arguments.getOrDefault(Argument.BUCKET_NULLABLE, Literal.TRUE).getValue(); + boolean bucketNullable = (Boolean) arguments.get(Argument.BUCKET_NULLABLE).getValue(); // 2. Build groupList List groupList = getPartitionExprList(ctx.statsByClause()); @@ -514,13 +513,14 @@ public UnresolvedPlan visitEventstatsCommand(OpenSearchPPLParser.EventstatsComma /** Streamstats command. */ public UnresolvedPlan visitStreamstatsCommand(OpenSearchPPLParser.StreamstatsCommandContext ctx) { // 1. Parse arguments from the streamstats command - List argExprList = ArgumentFactory.getArgumentList(ctx); + List argExprList = ArgumentFactory.getArgumentList(ctx, settings); ArgumentMap arguments = ArgumentMap.of(argExprList); - // current, window and global from ArgumentFactory + // current, window, global and bucket_nullable from ArgumentFactory boolean current = (Boolean) arguments.get("current").getValue(); int window = (Integer) arguments.get("window").getValue(); boolean global = (Boolean) arguments.get("global").getValue(); + boolean bucketNullable = (Boolean) arguments.get(Argument.BUCKET_NULLABLE).getValue(); if (window < 0) { throw new IllegalArgumentException("Window size must be >= 0, but got: " + window); @@ -571,6 +571,7 @@ public UnresolvedPlan visitStreamstatsCommand(OpenSearchPPLParser.StreamstatsCom current, window, global, + bucketNullable, resetBeforeExpr, resetAfterExpr); } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 41b1d7c2490..f3310ca8d3f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -95,7 +95,7 @@ public static List getArgumentList( * @param ctx StreamstatsCommandContext instance * @return the list of arguments fetched from the streamstats command */ - public static List getArgumentList(StreamstatsCommandContext ctx) { + public static List getArgumentList(StreamstatsCommandContext ctx, Settings settings) { return Arrays.asList( ctx.streamstatsArgs().currentArg() != null && !ctx.streamstatsArgs().currentArg().isEmpty() ? new Argument("current", getArgumentValue(ctx.streamstatsArgs().currentArg(0).current)) @@ -105,7 +105,15 @@ public static List getArgumentList(StreamstatsCommandContext ctx) { : new Argument("window", new Literal(0, DataType.INTEGER)), ctx.streamstatsArgs().globalArg() != null && !ctx.streamstatsArgs().globalArg().isEmpty() ? new Argument("global", getArgumentValue(ctx.streamstatsArgs().globalArg(0).global)) - : new Argument("global", new Literal(true, DataType.BOOLEAN))); + : new Argument("global", new Literal(true, DataType.BOOLEAN)), + ctx.streamstatsArgs().bucketNullableArg() != null + && !ctx.streamstatsArgs().bucketNullableArg().isEmpty() + ? new Argument( + Argument.BUCKET_NULLABLE, + getArgumentValue(ctx.streamstatsArgs().bucketNullableArg(0).bucket_nullable)) + : new Argument( + Argument.BUCKET_NULLABLE, + UnresolvedPlanHelper.legacyPreferred(settings) ? Literal.TRUE : Literal.FALSE)); } /** diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index a5853ecba5d..b073453ecbc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -19,6 +19,33 @@ public CalcitePPLStreamstatsTest() { public void testStreamstatsBy() { String ppl = "source=EMP | streamstats max(SAL) by DEPTNO"; RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" + + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" + + " (PARTITION BY $7 ROWS UNBOUNDED PRECEDING)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" + + " OVER (PARTITION BY `DEPTNO` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)" + + " `max(SAL)`\n" + + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + + " ROW_NUMBER() OVER () `__stream_seq__`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "ORDER BY `__stream_seq__` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testStreamstatsByNullBucket() { + String ppl = "source=EMP | streamstats bucket_nullable=false max(SAL) by DEPTNO"; + RelNode root = getRelNode(ppl); String expectedLogical = "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" @@ -76,7 +103,8 @@ public void testStreamstatsWindow() { + " LogicalTableScan(table=[[scott, EMP]])\n" + " LogicalAggregate(group=[{}], max(SAL)=[MAX($5)])\n" + " LogicalFilter(condition=[AND(>=($8, -($cor0.__stream_seq__, 4)), <=($8," - + " $cor0.__stream_seq__), =($7, $cor0.DEPTNO))])\n" + + " $cor0.__stream_seq__), OR(=($7, $cor0.DEPTNO), AND(IS NULL($7), IS" + + " NULL($cor0.DEPTNO))))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -93,7 +121,8 @@ public void testStreamstatsWindow() { + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t0`\n" + "WHERE `__stream_seq__` >= `$cor0`.`__stream_seq__` - 4 AND `__stream_seq__` <=" - + " `$cor0`.`__stream_seq__` AND `DEPTNO` = `$cor0`.`DEPTNO`) `t2`\n" + + " `$cor0`.`__stream_seq__` AND (`DEPTNO` = `$cor0`.`DEPTNO` OR `DEPTNO` IS NULL AND" + + " `$cor0`.`DEPTNO` IS NULL)) `t2`\n" + "ORDER BY `$cor0`.`__stream_seq__` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -107,17 +136,16 @@ public void testStreamstatsGlobal() { + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[CASE(IS NOT" - + " NULL($7), MAX($5) OVER (PARTITION BY $7 ROWS 4 PRECEDING), null:DECIMAL(7, 2))])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" + + " (PARTITION BY $7 ROWS 4 PRECEDING)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, CASE WHEN" - + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ROWS BETWEEN 4" - + " PRECEDING AND CURRENT ROW) ELSE NULL END `max(SAL)`\n" + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" + + " OVER (PARTITION BY `DEPTNO` ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -147,7 +175,8 @@ public void testStreamstatsReset() { + " LogicalTableScan(table=[[scott, EMP]])\n" + " LogicalAggregate(group=[{}], avg(SAL)=[AVG($5)])\n" + " LogicalFilter(condition=[AND(<=($8, $cor0.__stream_seq__), =($11," - + " $cor0.__seg_id__), =($7, $cor0.DEPTNO))])\n" + + " $cor0.__seg_id__), OR(=($7, $cor0.DEPTNO), AND(IS NULL($7), IS" + + " NULL($cor0.DEPTNO))))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], __reset_before_flag__=[$9]," + " __reset_after_flag__=[$10], __seg_id__=[+(SUM($9) OVER (ROWS UNBOUNDED PRECEDING)," @@ -184,7 +213,8 @@ public void testStreamstatsReset() { + " `__reset_after_flag__`\n" + "FROM `scott`.`EMP`) `t1`) `t2`\n" + "WHERE `__stream_seq__` <= `$cor0`.`__stream_seq__` AND `__seg_id__` =" - + " `$cor0`.`__seg_id__` AND `DEPTNO` = `$cor0`.`DEPTNO`) `t4`\n" + + " `$cor0`.`__seg_id__` AND (`DEPTNO` = `$cor0`.`DEPTNO` OR `DEPTNO` IS NULL AND" + + " `$cor0`.`DEPTNO` IS NULL)) `t4`\n" + "ORDER BY `$cor0`.`__stream_seq__` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } From 5c2f50ba558146336d366c1b668cd4de66d823e6 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 26 Nov 2025 14:44:40 +0800 Subject: [PATCH 79/99] Specify timestamp field with `timefield` in timechart command (#4784) * Support param timefield to specify span field in timechart Signed-off-by: Yuanchun Shen * Update doc to introduce timefield parameter Signed-off-by: Yuanchun Shen * Update ASTBuilderTest for chart: default args are handled in rel node visitor Signed-off-by: Yuanchun Shen * Fix ast expression builder test Signed-off-by: Yuanchun Shen * Fix anomanyzer test Signed-off-by: Yuanchun Shen * Support using specified timefield in per functions Signed-off-by: Yuanchun Shen * Omit by-timestamp clause in timechart command Signed-off-by: Yuanchun Shen * Mask timefield argument in anonymizer Signed-off-by: Yuanchun Shen * Anonymize argument span Signed-off-by: Yuanchun Shen --------- Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/ast/tree/Chart.java | 8 +-- docs/user/ppl/cmd/timechart.rst | 6 +- .../remote/CalciteTimechartCommandIT.java | 13 ++++- .../remote/CalciteTimechartPerFunctionIT.java | 26 +++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 + .../opensearch/sql/ppl/parser/AstBuilder.java | 51 ++++++----------- .../sql/ppl/parser/AstExpressionBuilder.java | 43 +-------------- .../sql/ppl/utils/ArgumentFactory.java | 55 +++++++++++++++++++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 35 ++++++++---- .../sql/ppl/parser/AstBuilderTest.java | 20 ++----- .../ppl/parser/AstExpressionBuilderTest.java | 44 ++++----------- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 7 ++- 13 files changed, 166 insertions(+), 145 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java index ada20cbde74..2118d90117a 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Chart.java @@ -97,16 +97,14 @@ private UnresolvedPlan transformPerFunction() { PerFunction perFunc = perFuncOpt.get(); // For chart, the rowSplit should contain the span information - UnresolvedExpression spanExpr = rowSplit; - if (rowSplit instanceof Alias) { - spanExpr = ((Alias) rowSplit).getDelegated(); - } + UnresolvedExpression spanExpr = + rowSplit instanceof Alias ? ((Alias) rowSplit).getDelegated() : rowSplit; if (!(spanExpr instanceof Span)) { return this; // Cannot transform without span information } Span span = (Span) spanExpr; - Field spanStartTime = AstDSL.implicitTimestampField(); + Field spanStartTime = (Field) span.getField(); Function spanEndTime = timestampadd(span.getUnit(), span.getValue(), spanStartTime); Function spanMillis = timestampdiff(MILLISECOND, spanStartTime, spanEndTime); final int SECOND_IN_MILLISECOND = 1000; diff --git a/docs/user/ppl/cmd/timechart.rst b/docs/user/ppl/cmd/timechart.rst index f336007d8fc..21ac980d46a 100644 --- a/docs/user/ppl/cmd/timechart.rst +++ b/docs/user/ppl/cmd/timechart.rst @@ -16,7 +16,9 @@ Description Syntax ====== -timechart [span=] [limit=] [useother=] [by ] +timechart [timefield=] [span=] [limit=] [useother=] [by ] + +* timefield: optional. Specifies the timestamp field to use for time interval grouping. **Default**: ``@timestamp``. * span: optional. Specifies the time interval for grouping data. **Default:** 1m (1 minute). @@ -92,7 +94,7 @@ Return type: DOUBLE Notes ===== -* The ``timechart`` command requires a timestamp field named ``@timestamp`` in the data. +* The ``timechart`` command requires a timestamp field in the data. By default, it uses the ``@timestamp`` field, but you can specify a different field using the ``timefield`` parameter. * Results are returned in an unpivoted format with separate rows for each time-field combination that has data. * Only combinations with actual data are included in the results - empty combinations are omitted rather than showing null or zero values. * The "top N" values for the ``limit`` parameter are selected based on the sum of values across all time intervals for each distinct field value. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java index 3b5c5f55475..73396ab31b9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartCommandIT.java @@ -13,6 +13,7 @@ import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.client.ResponseException; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalciteTimechartCommandIT extends PPLIntegTestCase { @@ -64,7 +65,7 @@ public void testTimechartWithMinuteSpanAndGroupBy() throws IOException { } @Test - public void testTimechartWithoutTimestampField() throws IOException { + public void testTimechartWithoutTimestampField() { Throwable exception = assertThrows( ResponseException.class, @@ -74,6 +75,16 @@ public void testTimechartWithoutTimestampField() throws IOException { verifyErrorMessageContains(exception, "Field [@timestamp] not found."); } + @Test + public void testTimechartWithCustomTimeField() throws IOException { + JSONObject result = + executeQuery( + StringUtils.format( + "source=%s | timechart timefield=birthdate span=1year count()", TEST_INDEX_BANK)); + verifySchema(result, schema("birthdate", "timestamp"), schema("count()", "bigint")); + verifyDataRows(result, rows("2017-01-01 00:00:00", 2), rows("2018-01-01 00:00:00", 5)); + } + @Test public void testTimechartWithMinuteSpanNoGroupBy() throws IOException { JSONObject result = executeQuery("source=events | timechart span=1m avg(cpu_usage)"); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartPerFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartPerFunctionIT.java index 41751376424..b7d072ba6d5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartPerFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTimechartPerFunctionIT.java @@ -5,6 +5,7 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.util.MatcherUtils.closeTo; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -13,6 +14,8 @@ import java.io.IOException; import org.json.JSONObject; import org.junit.jupiter.api.Test; +import org.opensearch.sql.common.utils.StringUtils; +import org.opensearch.sql.legacy.TestsConstants; import org.opensearch.sql.ppl.PPLIntegTestCase; public class CalciteTimechartPerFunctionIT extends PPLIntegTestCase { @@ -24,6 +27,7 @@ public void init() throws Exception { disallowCalciteFallback(); loadIndex(Index.EVENTS_TRAFFIC); + loadIndex(Index.BANK); } @Test @@ -208,4 +212,26 @@ public void testTimechartPerDayWithByClause() throws IOException { rows("2025-09-08 10:02:00", "server1", 43200.0), // 60 * 720 rows("2025-09-08 10:02:00", "server2", 129600.0)); // 180 * 720 } + + @Test + public void testTimechartPerMonthWithSpecifiedSpan() throws IOException { + JSONObject result = + executeQuery( + StringUtils.format( + "source=%s | timechart timefield=birthdate span=1month per_day(balance) by gender", + TestsConstants.TEST_INDEX_BANK)); + verifySchema( + result, + schema("birthdate", "timestamp"), + schema("gender", "string"), + schema("per_day(balance)", "double")); + verifyDataRows( + result, + closeTo("2017-10-01 00:00:00", "M", 1265.3225806451612), + closeTo("2017-11-01 00:00:00", "M", 189.53333333333333), + closeTo("2018-06-01 00:00:00", "F", 1094.6), + closeTo("2018-06-01 00:00:00", "M", 547.2666666666667), + closeTo("2018-08-01 00:00:00", "F", 2858.9032258064517), + closeTo("2018-11-01 00:00:00", "M", 139.33333333333334)); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 8abff8b4032..2c976f2fcf0 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -145,6 +145,7 @@ LIMIT: 'LIMIT'; USEOTHER: 'USEOTHER'; OTHERSTR: 'OTHERSTR'; NULLSTR: 'NULLSTR'; +TIMEFIELD: 'TIMEFIELD'; INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 0a38820a1bb..69fdcafc4c6 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -328,6 +328,7 @@ timechartParameter : LIMIT EQUAL integerLiteral | SPAN EQUAL spanLiteral | USEOTHER EQUAL (booleanLiteral | ident) + | TIMEFIELD EQUAL (ident | stringLiteral) ; spanLiteral @@ -1572,6 +1573,7 @@ searchableKeyWord | SED | MAX_MATCH | OFFSET_FIELD + | TIMEFIELD | patternMethod | patternMode // AGGREGATIONS AND WINDOW diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index c83298edeab..ad1cc6926cf 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -63,8 +63,6 @@ import org.opensearch.sql.ast.expression.SearchAnd; import org.opensearch.sql.ast.expression.SearchExpression; import org.opensearch.sql.ast.expression.SearchGroup; -import org.opensearch.sql.ast.expression.Span; -import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.UnresolvedArgument; import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.expression.WindowFrame; @@ -771,41 +769,28 @@ private List parseAggTerms( /** Timechart command. */ @Override public UnresolvedPlan visitTimechartCommand(OpenSearchPPLParser.TimechartCommandContext ctx) { - UnresolvedExpression binExpression = - AstDSL.span(AstDSL.implicitTimestampField(), AstDSL.intLiteral(1), SpanUnit.m); - Integer limit = 10; - Boolean useOther = true; - // Process timechart parameters - for (OpenSearchPPLParser.TimechartParameterContext paramCtx : ctx.timechartParameter()) { - UnresolvedExpression param = internalVisitExpression(paramCtx); - if (param instanceof Span) { - binExpression = param; - } else if (param instanceof Literal literal) { - if (DataType.BOOLEAN.equals(literal.getType())) { - useOther = (Boolean) literal.getValue(); - } else if (DataType.INTEGER.equals(literal.getType()) - || DataType.LONG.equals(literal.getType())) { - limit = (Integer) literal.getValue(); - } - } - } + List arguments = ArgumentFactory.getArgumentList(ctx, expressionBuilder); + ArgumentMap argMap = ArgumentMap.of(arguments); + Literal spanLiteral = argMap.getOrDefault("spanliteral", AstDSL.stringLiteral("1m")); + String timeFieldName = + Optional.ofNullable(argMap.get("timefield")) + .map(l -> (String) l.getValue()) + .orElse(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP); + Field spanField = AstDSL.field(timeFieldName); + Alias span = + AstDSL.alias(timeFieldName, AstDSL.spanFromSpanLengthLiteral(spanField, spanLiteral)); UnresolvedExpression aggregateFunction = parseAggTerms(List.of(ctx.statsAggTerm())).getFirst(); - UnresolvedExpression byField = - ctx.fieldExpression() != null ? internalVisitExpression(ctx.fieldExpression()) : null; - List arguments = - List.of( - new Argument("limit", AstDSL.intLiteral(limit)), - new Argument("useother", AstDSL.booleanLiteral(useOther))); - binExpression = AstDSL.alias(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP, binExpression); - if (byField != null) { - byField = - AstDSL.alias( - StringUtils.unquoteIdentifier(getTextInQuery(ctx.fieldExpression())), byField); - } + Optional.ofNullable(ctx.fieldExpression()) + .map( + f -> + AstDSL.alias( + StringUtils.unquoteIdentifier(getTextInQuery(f)), + internalVisitExpression(f))) + .orElse(null); return Chart.builder() .aggregationFunction(aggregateFunction) - .rowSplit(binExpression) + .rowSplit(span) .columnSplit(byField) .arguments(arguments) .build(); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 1a60ca5f2a3..fc4358a81e7 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -757,7 +757,7 @@ public UnresolvedExpression visitMaxOption(OpenSearchPPLParser.MaxOptionContext return new Argument("max", (Literal) this.visit(ctx.integerLiteral())); } - private QualifiedName visitIdentifiers(List ctx) { + public QualifiedName visitIdentifiers(List ctx) { return new QualifiedName( ctx.stream() .map(RuleContext::getText) @@ -995,47 +995,6 @@ public UnresolvedExpression visitTimeModifierValue( return AstDSL.stringLiteral(osDateMathExpression); } - @Override - public UnresolvedExpression visitTimechartParameter( - OpenSearchPPLParser.TimechartParameterContext ctx) { - UnresolvedExpression timechartParameter; - if (ctx.SPAN() != null) { - // Convert span=1h to span(@timestamp, 1h) - Literal spanLiteral = (Literal) visit(ctx.spanLiteral()); - timechartParameter = - AstDSL.spanFromSpanLengthLiteral(AstDSL.implicitTimestampField(), spanLiteral); - } else if (ctx.LIMIT() != null) { - Literal limit = (Literal) visit(ctx.integerLiteral()); - if ((Integer) limit.getValue() < 0) { - throw new IllegalArgumentException("Limit must be a non-negative number"); - } - timechartParameter = limit; - } else if (ctx.USEOTHER() != null) { - UnresolvedExpression useOther; - if (ctx.booleanLiteral() != null) { - useOther = visit(ctx.booleanLiteral()); - } else if (ctx.ident() != null) { - QualifiedName ident = visitIdentifiers(List.of(ctx.ident())); - String useOtherValue = ident.toString(); - if ("true".equalsIgnoreCase(useOtherValue) || "t".equalsIgnoreCase(useOtherValue)) { - useOther = AstDSL.booleanLiteral(true); - } else if ("false".equalsIgnoreCase(useOtherValue) || "f".equalsIgnoreCase(useOtherValue)) { - useOther = AstDSL.booleanLiteral(false); - } else { - throw new IllegalArgumentException( - "Invalid useOther value: " + ctx.ident().getText() + ". Expected true/false or t/f"); - } - } else { - throw new IllegalArgumentException("value for useOther must be a boolean or identifier"); - } - timechartParameter = useOther; - } else { - throw new IllegalArgumentException( - String.format("A parameter of timechart must be a span, limit or useOther, got %s", ctx)); - } - return timechartParameter; - } - /** * Process time range expressions (EARLIEST='value' or LATEST='value') It creates a Comparison * filter like @timestamp >= timeModifierValue diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index f3310ca8d3f..ed76b29b77a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -33,6 +33,7 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SortFieldContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StreamstatsCommandContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SuffixSortFieldContext; +import org.opensearch.sql.ppl.parser.AstExpressionBuilder; /** Util class to get all arguments as a list from the PPL command. */ public class ArgumentFactory { @@ -253,6 +254,60 @@ public static List getArgumentList(ChartCommandContext ctx) { return arguments; } + public static List getArgumentList( + OpenSearchPPLParser.TimechartCommandContext timechartCtx, + AstExpressionBuilder expressionBuilder) { + List arguments = new ArrayList<>(); + for (OpenSearchPPLParser.TimechartParameterContext ctx : timechartCtx.timechartParameter()) { + if (ctx.SPAN() != null) { + arguments.add( + new Argument("spanliteral", (Literal) expressionBuilder.visit(ctx.spanLiteral()))); + } else if (ctx.LIMIT() != null) { + Literal limit = getArgumentValue(ctx.integerLiteral()); + if ((Integer) limit.getValue() < 0) { + throw new IllegalArgumentException("Limit must be a non-negative number"); + } + arguments.add(new Argument("limit", limit)); + } else if (ctx.USEOTHER() != null) { + Literal useOther; + if (ctx.booleanLiteral() != null) { + useOther = getArgumentValue(ctx.booleanLiteral()); + } else if (ctx.ident() != null) { + String identLiteral = expressionBuilder.visitIdentifiers(List.of(ctx.ident())).toString(); + if ("true".equalsIgnoreCase(identLiteral) || "t".equalsIgnoreCase(identLiteral)) { + useOther = AstDSL.booleanLiteral(true); + } else if ("false".equalsIgnoreCase(identLiteral) || "f".equalsIgnoreCase(identLiteral)) { + useOther = AstDSL.booleanLiteral(false); + } else { + throw new IllegalArgumentException( + "Invalid useOther value: " + + ctx.ident().getText() + + ". Expected true/false or t/f"); + } + } else { + throw new IllegalArgumentException("value for useOther must be a boolean or identifier"); + } + arguments.add(new Argument("useother", useOther)); + } else if (ctx.TIMEFIELD() != null) { + Literal timeField; + if (ctx.ident() != null) { + timeField = + AstDSL.stringLiteral( + expressionBuilder.visitIdentifiers(List.of(ctx.ident())).toString()); + } else { + timeField = getArgumentValue(ctx.stringLiteral()); + } + arguments.add(new Argument("timefield", timeField)); + } else { + throw new IllegalArgumentException( + String.format( + "A parameter of timechart must be a span, limit, useother, or timefield, got %s", + ctx)); + } + } + return arguments; + } + /** * Get list of {@link Argument}. * diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 271688776e6..04d55992385 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -9,6 +9,7 @@ import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild; import static org.opensearch.sql.utils.QueryStringUtils.MASK_COLUMN; import static org.opensearch.sql.utils.QueryStringUtils.MASK_LITERAL; +import static org.opensearch.sql.utils.QueryStringUtils.MASK_TIMESTAMP_COLUMN; import static org.opensearch.sql.utils.QueryStringUtils.maskField; import com.google.common.base.Strings; @@ -18,8 +19,10 @@ import java.util.List; import java.util.Locale; import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; import lombok.Getter; +import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.AbstractNodeVisitor; @@ -98,6 +101,7 @@ import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; +import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.planner.logical.LogicalAggregation; @@ -513,22 +517,27 @@ public String visitChart(Chart node, String context) { if ("top".equals(argName)) { continue; } - if ("limit".equals(argName) || "useother".equals(argName) || "usenull".equals(argName)) { - chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); - } else if ("otherstr".equals(argName) || "nullstr".equals(argName)) { - chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); + + switch (argName) { + case "limit", "useother", "usenull", "otherstr", "nullstr" -> + chartCommand.append(" ").append(argName).append("=").append(MASK_LITERAL); + case "spanliteral" -> chartCommand.append(" span=").append(MASK_LITERAL); + case "timefield" -> + chartCommand.append(" ").append(argName).append("=").append(MASK_TIMESTAMP_COLUMN); + default -> + throw new NotImplementedException( + StringUtils.format("Please implement anonymizer for arg: %s", argName)); } } chartCommand.append(" ").append(visitExpression(node.getAggregationFunction())); if (node.getRowSplit() != null && node.getColumnSplit() != null) { - chartCommand - .append(" by ") - .append(visitExpression(node.getRowSplit())) - .append(" ") - .append(visitExpression(node.getColumnSplit())); - } else if (node.getRowSplit() != null) { + chartCommand.append(" by"); + // timechart command does not have to explicit the by-timestamp field clause + if (!isTimechart) chartCommand.append(" ").append(visitExpression(node.getRowSplit())); + chartCommand.append(" ").append(visitExpression(node.getColumnSplit())); + } else if (node.getRowSplit() != null && !isTimechart) { chartCommand.append(" by ").append(visitExpression(node.getRowSplit())); } else if (node.getColumnSplit() != null) { chartCommand.append(" by ").append(visitExpression(node.getColumnSplit())); @@ -544,8 +553,12 @@ private boolean isTimechartNode(Chart node) { Alias alias = (Alias) node.getRowSplit(); if (alias.getDelegated() instanceof Span) { Span span = (Span) alias.getDelegated(); + String timeFieldName = + Optional.ofNullable(ArgumentMap.of(node.getArguments()).get("timefield")) + .map(Literal::toString) + .orElse(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP); return span.getField() instanceof Field - && "@timestamp".equals(((Field) span.getField()).getField().toString()); + && timeFieldName.equals(((Field) span.getField()).getField().toString()); } } return false; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 808fbae9273..5f5a133d3fc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1249,10 +1249,7 @@ public void testTimechartWithPerSecondFunction() { alias("@timestamp", span(field("@timestamp"), intLiteral(1), SpanUnit.of("m")))) .columnSplit(null) .aggregationFunction(alias("per_second(a)", aggregate("sum", field("a")))) - .arguments( - exprList( - argument("limit", intLiteral(10)), - argument("useother", booleanLiteral(true)))) + .arguments(exprList()) .build(), let( field("per_second(a)"), @@ -1281,10 +1278,7 @@ public void testTimechartWithPerMinuteFunction() { alias("@timestamp", span(field("@timestamp"), intLiteral(1), SpanUnit.of("m")))) .columnSplit(null) .aggregationFunction(alias("per_minute(a)", aggregate("sum", field("a")))) - .arguments( - exprList( - argument("limit", intLiteral(10)), - argument("useother", booleanLiteral(true)))) + .arguments(exprList()) .build(), let( field("per_minute(a)"), @@ -1313,10 +1307,7 @@ public void testTimechartWithPerHourFunction() { alias("@timestamp", span(field("@timestamp"), intLiteral(1), SpanUnit.of("m")))) .columnSplit(null) .aggregationFunction(alias("per_hour(a)", aggregate("sum", field("a")))) - .arguments( - exprList( - argument("limit", intLiteral(10)), - argument("useother", booleanLiteral(true)))) + .arguments(exprList()) .build(), let( field("per_hour(a)"), @@ -1345,10 +1336,7 @@ public void testTimechartWithPerDayFunction() { alias("@timestamp", span(field("@timestamp"), intLiteral(1), SpanUnit.of("m")))) .columnSplit(null) .aggregationFunction(alias("per_day(a)", aggregate("sum", field("a")))) - .arguments( - exprList( - argument("limit", intLiteral(10)), - argument("useother", booleanLiteral(true)))) + .arguments(exprList()) .build(), let( field("per_day(a)"), diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 2176e51acbf..b316e461889 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -1423,9 +1423,7 @@ public void testTimechartSpanParameter() { intLiteral(30), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) + .arguments(exprList(argument("spanliteral", stringLiteral("30m")))) .build()); } @@ -1443,9 +1441,7 @@ public void testTimechartLimitParameter() { intLiteral(1), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(100)), argument("useother", booleanLiteral(true)))) + .arguments(exprList(argument("limit", intLiteral(100)))) .build()); } @@ -1470,9 +1466,7 @@ public void testTimechartUseOtherWithBooleanLiteral() { intLiteral(1), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) + .arguments(exprList(argument("useother", booleanLiteral(true)))) .build()); assertEqual( @@ -1487,9 +1481,7 @@ public void testTimechartUseOtherWithBooleanLiteral() { intLiteral(1), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(false)))) + .arguments(exprList(argument("useother", booleanLiteral(false)))) .build()); } @@ -1507,9 +1499,7 @@ public void testTimechartUseOtherWithIdentifier() { intLiteral(1), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) + .arguments(exprList(argument("useother", booleanLiteral(true)))) .build()); assertEqual( @@ -1524,9 +1514,7 @@ public void testTimechartUseOtherWithIdentifier() { intLiteral(1), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(false)))) + .arguments(exprList(argument("useother", booleanLiteral(false)))) .build()); assertEqual( @@ -1541,9 +1529,7 @@ public void testTimechartUseOtherWithIdentifier() { intLiteral(1), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) + .arguments(exprList(argument("useother", booleanLiteral(true)))) .build()); assertEqual( @@ -1558,9 +1544,7 @@ public void testTimechartUseOtherWithIdentifier() { intLiteral(1), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(false)))) + .arguments(exprList(argument("useother", booleanLiteral(false)))) .build()); } @@ -1634,9 +1618,7 @@ public void testVisitSpanLiteral() { intLiteral(1), SpanUnit.H))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) + .arguments(exprList(argument("spanliteral", stringLiteral("1h")))) .build()); // Test span literal with decimal value and minute unit @@ -1652,9 +1634,7 @@ public void testVisitSpanLiteral() { intLiteral(2), SpanUnit.m))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) + .arguments(exprList(argument("spanliteral", stringLiteral("2m")))) .build()); // Test span literal without unit (should use NONE unit) @@ -1670,9 +1650,7 @@ public void testVisitSpanLiteral() { intLiteral(10), SpanUnit.NONE))) .aggregationFunction(alias("count()", aggregate("count", allFields()))) - .arguments( - exprList( - argument("limit", intLiteral(10)), argument("useother", booleanLiteral(true)))) + .arguments(exprList(argument("spanliteral", intLiteral(10)))) .build()); // Test span literal with decimal value diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 45f7611db17..ede8c4e4a5a 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -268,9 +268,12 @@ public void testReverseCommand() { @Test public void testTimechartCommand() { assertEquals( - "source=table | timechart limit=*** useother=*** count() by span(time_identifier, ***" - + " m) identifier", + "source=table | timechart count() by identifier", anonymize("source=t | timechart count() by host")); + + assertEquals( + "source=table | timechart timefield=time_identifier max(identifier)", + anonymize("source=t | timechart timefield=month max(revenue)")); } @Test From 9f3066946d6a39861e5512f20cfb5fcd7b23ee61 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Wed, 26 Nov 2025 16:12:05 +0800 Subject: [PATCH 80/99] Convert `dedup` pushdown to composite + top_hits (#4844) * Enable dedup pushdown Signed-off-by: Lantao Jin * fix doctest Signed-off-by: Lantao Jin * refactor Signed-off-by: Lantao Jin * Disable dedup expr Signed-off-by: Lantao Jin * fix IT Signed-off-by: Lantao Jin * fix yaml test Signed-off-by: Lantao Jin * add more comments in code Signed-off-by: Lantao Jin * fix conflicts Signed-off-by: Lantao Jin * Address comments Signed-off-by: Lantao Jin --------- Signed-off-by: Lantao Jin --- .../sql/calcite/CalciteRelNodeVisitor.java | 68 ++--- .../sql/calcite/utils/PlanUtils.java | 87 +++++- .../opensearch/sql/data/type/ExprType.java | 12 +- docs/user/ppl/interfaces/endpoint.rst | 8 +- .../sql/calcite/remote/CalciteExplainIT.java | 131 +++++++- .../sql/calcite/remote/CalcitePPLDedupIT.java | 115 +++++++ .../org/opensearch/sql/ppl/ExplainIT.java | 24 +- .../org/opensearch/sql/util/MatcherUtils.java | 5 +- .../calcite/explain_dedup_complex1.yaml | 10 + .../calcite/explain_dedup_complex2.yaml | 11 + .../calcite/explain_dedup_complex3.yaml | 10 + .../calcite/explain_dedup_complex4.yaml | 11 + .../calcite/explain_dedup_expr1.yaml | 0 .../calcite/explain_dedup_expr2.yaml | 0 .../calcite/explain_dedup_expr3.yaml | 0 .../calcite/explain_dedup_expr4.yaml | 0 .../explain_dedup_keepempty_false_push.json | 6 - .../explain_dedup_keepempty_false_push.yaml | 11 + ...plain_dedup_keepempty_true_not_pushed.yaml | 12 + .../explain_dedup_keepempty_true_push.json | 6 - .../calcite/explain_dedup_push.json | 6 - .../calcite/explain_dedup_push.yaml | 11 + .../explain_dedup_text_type_no_push.yaml | 13 + ...explain_join_with_criteria_max_option.json | 6 - ...explain_join_with_criteria_max_option.yaml | 28 ++ .../explain_join_with_fields_max_option.json | 6 - .../explain_join_with_fields_max_option.yaml | 29 ++ .../explain_dedup_keepempty_false_push.json | 6 - .../explain_dedup_keepempty_false_push.yaml | 15 + ...plain_dedup_keepempty_true_not_pushed.yaml | 12 + .../explain_dedup_keepempty_true_push.json | 6 - .../explain_dedup_push.json | 6 - .../explain_dedup_push.yaml | 15 + ...explain_join_with_criteria_max_option.json | 6 - .../explain_join_with_fields_max_option.json | 6 - .../explain_dedup_keepempty_false_push.json | 30 -- .../explain_dedup_keepempty_false_push.yaml | 24 ++ ...plain_dedup_keepempty_true_not_pushed.yaml | 24 ++ .../explain_dedup_keepempty_true_push.json | 30 -- .../ppl/explain_dedup_push.json | 30 -- .../ppl/explain_dedup_push.yaml | 24 ++ .../rest-api-spec/test/issues/4563_4664.yml | 4 +- .../data/type/OpenSearchAliasType.java | 3 +- .../value/OpenSearchExprValueFactory.java | 7 +- .../planner/rules/AggregateIndexScanRule.java | 28 +- .../planner/rules/DedupPushdownRule.java | 281 ++++++++++++++---- .../ExpandCollationOnProjectExprRule.java | 2 +- .../planner/rules/OpenSearchIndexRules.java | 7 +- .../rules/SortProjectExprTransposeRule.java | 2 +- .../opensearch/request/AggregateAnalyzer.java | 76 ++++- .../request/OpenSearchRequestBuilder.java | 2 + .../response/OpenSearchResponse.java | 2 +- .../response/agg/ArgMaxMinParser.java | 14 +- .../response/agg/BucketAggregationParser.java | 9 +- .../agg/CompositeAggregationParser.java | 25 +- .../opensearch/response/agg/FilterParser.java | 3 +- .../opensearch/response/agg/MetricParser.java | 3 +- .../response/agg/MetricParserHelper.java | 25 +- .../agg/NoBucketAggregationParser.java | 3 +- .../response/agg/PercentilesParser.java | 22 +- .../response/agg/SinglePercentileParser.java | 18 +- .../response/agg/SingleValueParser.java | 11 +- .../opensearch/response/agg/StatsParser.java | 10 +- .../response/agg/TopHitsParser.java | 78 +++-- .../scan/AbstractCalciteIndexScan.java | 14 +- .../storage/scan/CalciteLogicalIndexScan.java | 2 +- .../storage/scan/context/PushDownContext.java | 4 + .../storage/scan/context/PushDownType.java | 3 +- .../dsl/MetricAggregationBuilder.java | 2 +- .../request/AggregateAnalyzerTest.java | 6 +- ...enSearchAggregationResponseParserTest.java | 39 +-- .../scan/CalciteIndexScanCostTest.java | 13 - .../sql/ppl/calcite/CalcitePPLDedupTest.java | 94 ++++++ .../sql/ppl/calcite/CalcitePPLJoinTest.java | 12 +- 74 files changed, 1207 insertions(+), 477 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_push.json delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_push.json delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.json delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.json delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.json create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_push.json delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.json create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 1e0c994c182..6a556eccc92 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -15,6 +15,7 @@ import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; +import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_JOIN_MAX_DEDUP; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_MAIN; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_RARE_TOP; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_STREAMSTATS; @@ -48,9 +49,6 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.hint.HintStrategyTable; -import org.apache.calcite.rel.hint.RelHint; -import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFamily; @@ -1054,7 +1052,7 @@ private Pair, List> aggregateWithTrimming( List intendedGroupKeyAliases = getGroupKeyNamesAfterAggregation(reResolved.getLeft()); context.relBuilder.aggregate( context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight()); - if (hintBucketNonNull) addIgnoreNullBucketHintToAggregate(context); + if (hintBucketNonNull) PlanUtils.addIgnoreNullBucketHintToAggregate(context.relBuilder); // During aggregation, Calcite projects both input dependencies and output group-by fields. // When names conflict, Calcite adds numeric suffixes (e.g., "value0"). // Apply explicit renaming to restore the intended aliases. @@ -1316,7 +1314,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { : duplicatedFieldNames.stream() .map(a -> (RexNode) context.relBuilder.field(a)) .toList(); - buildDedupNotNull(context, dedupeFields, allowedDuplication); + buildDedupNotNull(context, dedupeFields, allowedDuplication, true); } context.relBuilder.join( JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition); @@ -1372,7 +1370,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) { List dedupeFields = getRightColumnsInJoinCriteria(context.relBuilder, joinCondition); - buildDedupNotNull(context, dedupeFields, allowedDuplication); + buildDedupNotNull(context, dedupeFields, allowedDuplication, true); } context.relBuilder.join( JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition); @@ -1537,7 +1535,7 @@ public RelNode visitDedupe(Dedupe node, CalcitePlanContext context) { if (keepEmpty) { buildDedupOrNull(context, dedupeFields, allowedDuplication); } else { - buildDedupNotNull(context, dedupeFields, allowedDuplication); + buildDedupNotNull(context, dedupeFields, allowedDuplication, false); } return context.relBuilder.peek(); } @@ -1545,16 +1543,12 @@ public RelNode visitDedupe(Dedupe node, CalcitePlanContext context) { private static void buildDedupOrNull( CalcitePlanContext context, List dedupeFields, Integer allowedDuplication) { /* - * | dedup 2 a, b keepempty=false - * DropColumns('_row_number_dedup_) - * +- Filter ('_row_number_dedup_ <= n OR isnull('a) OR isnull('b)) - * +- Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, specifiedwindowoundedpreceding$(), currentrow$())) AS _row_number_dedup_], ['a, 'b], ['a ASC NULLS FIRST, 'b ASC NULLS FIRST] + * | dedup 2 a, b keepempty=true + * LogicalProject(...) + * +- LogicalFilter(condition=[OR(IS NULL(a), IS NULL(b), <=(_row_number_dedup_, 1))]) + * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY a, b)]) * +- ... */ - // Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, - // specifiedwindowoundedpreceding$(), currentrow$())) AS _row_number_dedup_], ['a, 'b], ['a - // ASC - // NULLS FIRST, 'b ASC NULLS FIRST] RexNode rowNumber = context .relBuilder @@ -1577,16 +1571,21 @@ private static void buildDedupOrNull( } private static void buildDedupNotNull( - CalcitePlanContext context, List dedupeFields, Integer allowedDuplication) { + CalcitePlanContext context, + List dedupeFields, + Integer allowedDuplication, + boolean fromJoinMaxOption) { /* * | dedup 2 a, b keepempty=false - * DropColumns('_row_number_dedup_) - * +- Filter ('_row_number_dedup_ <= n) - * +- Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, specifiedwindowoundedpreceding$(), currentrow$())) AS _row_number_dedup_], ['a, 'b], ['a ASC NULLS FIRST, 'b ASC NULLS FIRST] - * +- Filter (isnotnull('a) AND isnotnull('b)) - * +- ... + * LogicalProject(...) + * +- LogicalFilter(condition=[<=(_row_number_dedup_, n)])) + * +- LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY a, b)]) + * +- LogicalFilter(condition=[AND(IS NOT NULL(a), IS NOT NULL(b))]) + * +- ... */ // Filter (isnotnull('a) AND isnotnull('b)) + String rowNumberAlias = + fromJoinMaxOption ? ROW_NUMBER_COLUMN_FOR_JOIN_MAX_DEDUP : ROW_NUMBER_COLUMN_FOR_DEDUP; context.relBuilder.filter( context.relBuilder.and(dedupeFields.stream().map(context.relBuilder::isNotNull).toList())); // Window [row_number() windowspecdefinition('a, 'b, 'a ASC NULLS FIRST, 'b ASC NULLS FIRST, @@ -1600,15 +1599,15 @@ private static void buildDedupNotNull( .partitionBy(dedupeFields) .orderBy(dedupeFields) .rowsTo(RexWindowBounds.CURRENT_ROW) - .as(ROW_NUMBER_COLUMN_FOR_DEDUP); + .as(rowNumberAlias); context.relBuilder.projectPlus(rowNumber); - RexNode _row_number_dedup_ = context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_DEDUP); + RexNode rowNumberField = context.relBuilder.field(rowNumberAlias); // Filter ('_row_number_dedup_ <= n) context.relBuilder.filter( context.relBuilder.lessThanOrEqual( - _row_number_dedup_, context.relBuilder.literal(allowedDuplication))); + rowNumberField, context.relBuilder.literal(allowedDuplication))); // DropColumns('_row_number_dedup_) - context.relBuilder.projectExcept(_row_number_dedup_); + context.relBuilder.projectExcept(rowNumberField); } @Override @@ -2395,25 +2394,6 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) { return context.relBuilder.peek(); } - private static void addIgnoreNullBucketHintToAggregate(CalcitePlanContext context) { - final RelHint statHits = - RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); - assert context.relBuilder.peek() instanceof LogicalAggregate - : "Stats hits should be added to LogicalAggregate"; - context.relBuilder.hints(statHits); - context - .relBuilder - .getCluster() - .setHintStrategies( - HintStrategyTable.builder() - .hintStrategy( - "stats_args", - (hint, rel) -> { - return rel instanceof LogicalAggregate; - }) - .build()); - } - @Override public RelNode visitTableFunction(TableFunction node, CalcitePlanContext context) { throw new CalciteUnsupportedException("Table function is unsupported in Calcite"); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index cd9abcf32ba..50e03fc608f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -25,15 +25,21 @@ import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelShuttle; +import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.hint.HintStrategyTable; +import org.apache.calcite.rel.hint.RelHint; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexVisitorImpl; @@ -45,8 +51,11 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.Pair; import org.apache.calcite.util.Util; +import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.mapping.Mappings; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.IntervalUnit; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.expression.WindowBound; @@ -62,6 +71,7 @@ public interface PlanUtils { /** this is only for dedup command, do not reuse it in other command */ String ROW_NUMBER_COLUMN_FOR_DEDUP = "_row_number_dedup_"; + String ROW_NUMBER_COLUMN_FOR_JOIN_MAX_DEDUP = "_row_number_join_max_dedup_"; String ROW_NUMBER_COLUMN_FOR_RARE_TOP = "_row_number_rare_top_"; String ROW_NUMBER_COLUMN_FOR_MAIN = "_row_number_main_"; String ROW_NUMBER_COLUMN_FOR_SUBSEARCH = "_row_number_subsearch_"; @@ -449,18 +459,15 @@ static RexNode derefMapCall(RexNode rexNode) { return rexNode; } - /** Check if contains RexOver introduced by dedup */ - static boolean containsRowNumberDedup(LogicalProject project) { - return project.getProjects().stream() - .anyMatch(p -> p instanceof RexOver && p.getKind() == SqlKind.ROW_NUMBER) - && project.getRowType().getFieldNames().contains(ROW_NUMBER_COLUMN_FOR_DEDUP); + /** Check if contains dedup */ + static boolean containsRowNumberDedup(RelNode node) { + return node.getRowType().getFieldNames().stream().anyMatch(ROW_NUMBER_COLUMN_FOR_DEDUP::equals); } - /** Check if contains RexOver introduced by dedup top/rare */ - static boolean containsRowNumberRareTop(LogicalProject project) { - return project.getProjects().stream() - .anyMatch(p -> p instanceof RexOver && p.getKind() == SqlKind.ROW_NUMBER) - && project.getRowType().getFieldNames().contains(ROW_NUMBER_COLUMN_FOR_RARE_TOP); + /** Check if contains dedup for top/rare */ + static boolean containsRowNumberRareTop(RelNode node) { + return node.getRowType().getFieldNames().stream() + .anyMatch(ROW_NUMBER_COLUMN_FOR_RARE_TOP::equals); } /** Get all RexWindow list from LogicalProject */ @@ -508,10 +515,6 @@ static boolean distinctProjectList(LogicalProject project) { return project.getNamedProjects().stream().allMatch(rexSet::add); } - static boolean containsRexOver(LogicalProject project) { - return project.getProjects().stream().anyMatch(RexOver::containsOver); - } - /** * The LogicalSort is a LIMIT that should be pushed down when its fetch field is not null and its * collation is empty. For example: sort name | head 5 should not be pushed down @@ -524,7 +527,7 @@ static boolean isLogicalSortLimit(LogicalSort sort) { return sort.fetch != null; } - static boolean projectContainsExpr(Project project) { + static boolean containsRexCall(Project project) { return project.getProjects().stream().anyMatch(p -> p instanceof RexCall); } @@ -595,4 +598,58 @@ static void replaceTop(RelBuilder relBuilder, RelNode relNode) { throw new IllegalStateException("Unable to invoke RelBuilder.replaceTop", e); } } + + static void addIgnoreNullBucketHintToAggregate(RelBuilder relBuilder) { + final RelHint statHits = + RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build(); + assert relBuilder.peek() instanceof LogicalAggregate + : "Stats hits should be added to LogicalAggregate"; + relBuilder.hints(statHits); + relBuilder + .getCluster() + .setHintStrategies( + HintStrategyTable.builder() + .hintStrategy( + "stats_args", + (hint, rel) -> { + return rel instanceof LogicalAggregate; + }) + .build()); + } + + /** Extract the RexLiteral from the aggregate call if the aggregate call is a LITERAL_AGG. */ + static @Nullable RexLiteral getObjectFromLiteralAgg(AggregateCall aggCall) { + if (aggCall.getAggregation().kind == SqlKind.LITERAL_AGG) { + return (RexLiteral) + aggCall.rexList.stream().filter(rex -> rex instanceof RexLiteral).findAny().orElse(null); + } else { + return null; + } + } + + /** + * This is a helper method to create a target mapping easily for replacing calling {@link + * Mappings#target(List, int)} + * + * @param rexNodes the rex list in schema + * @param schema the schema which contains the rex list + * @return the target mapping + */ + static Mapping mapping(List rexNodes, RelDataType schema) { + return Mappings.target(getSelectColumns(rexNodes), schema.getFieldCount()); + } + + static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) { + RexNode condition = filter.getCondition(); + return isNotNullOnRef(condition) + || (condition instanceof RexCall rexCall + && rexCall.getOperator().equals(SqlStdOperatorTable.AND) + && rexCall.getOperands().stream().allMatch(PlanUtils::isNotNullOnRef)); + } + + private static boolean isNotNullOnRef(RexNode rex) { + return rex instanceof RexCall rexCall + && rexCall.isA(SqlKind.IS_NOT_NULL) + && rexCall.getOperands().get(0) instanceof RexInputRef; + } } diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java index 9234c0a2352..3ec5f954984 100644 --- a/core/src/main/java/org/opensearch/sql/data/type/ExprType.java +++ b/core/src/main/java/org/opensearch/sql/data/type/ExprType.java @@ -62,10 +62,18 @@ default Optional getOriginalPath() { } /** - * Get the original path. Types like alias type should be derived from the type of the original - * field. + * Get the original expr path. Types like alias type should be derived from the type of the + * original field. */ default ExprType getOriginalExprType() { return this; } + + /** + * Get the original data type. Types like alias type should be derived from the type of the + * original field. + */ + default ExprType getOriginalType() { + return this; + } } diff --git a/docs/user/ppl/interfaces/endpoint.rst b/docs/user/ppl/interfaces/endpoint.rst index b4acc21d8f4..08032ad6cda 100644 --- a/docs/user/ppl/interfaces/endpoint.rst +++ b/docs/user/ppl/interfaces/endpoint.rst @@ -117,12 +117,12 @@ Explain query:: sh$ curl -sS -H 'Content-Type: application/json' \ ... -X POST localhost:9200/_plugins/_ppl/_explain?format=extended \ - ... -d '{"query" : "source=state_country | where age>30 | dedup age"}' + ... -d '{"query" : "source=state_country | where age>30"}' { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[<=($12, 1)])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5], _id=[$6], _index=[$7], _score=[$8], _maxscore=[$9], _sort=[$10], _routing=[$11], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $5 ORDER BY $5)])\n LogicalFilter(condition=[IS NOT NULL($5)])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..6=[{inputs}], expr#7=[1], expr#8=[<=($t6, $t7)], proj#0..5=[{exprs}], $condition=[$t8])\n EnumerableWindow(window#0=[window(partition {5} order by [5] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n", - "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n int prevStart;\n int prevEnd;\n final java.util.Comparator comparator = new java.util.Comparator(){\n public int compare(Object[] v0, Object[] v1) {\n final int c;\n c = org.apache.calcite.runtime.Utilities.compareNullsLast((Long) v0[5], (Long) v1[5]);\n if (c != 0) {\n return c;\n }\n return 0;\n }\n\n public int compare(Object o0, Object o1) {\n return this.compare((Object[]) o0, (Object[]) o1);\n }\n\n };\n final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap();\n v1stashed.scan().foreach(new org.apache.calcite.linq4j.function.Function1() {\n public Object apply(Object[] v) {\n Long key = (Long) v[5];\n multiMap.putMulti(key, v);\n return null;\n }\n public Object apply(Object v) {\n return apply(\n (Object[]) v);\n }\n }\n );\n final java.util.Iterator iterator = multiMap.arrays(comparator);\n final java.util.ArrayList _list = new java.util.ArrayList(\n multiMap.size());\n Long a0w0 = (Long) null;\n while (iterator.hasNext()) {\n final Object[] _rows = (Object[]) iterator.next();\n prevStart = -1;\n prevEnd = 2147483647;\n for (int i = 0; i < _rows.length; (++i)) {\n final Object[] row = (Object[]) _rows[i];\n if (i != prevEnd) {\n int actualStart = i < prevEnd ? 0 : prevEnd + 1;\n prevEnd = i;\n a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue());\n }\n _list.add(new Object[] {\n row[0],\n row[1],\n row[2],\n row[3],\n row[4],\n row[5],\n a0w0});\n }\n }\n multiMap.clear();\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable = org.apache.calcite.linq4j.Linq4j.asEnumerable(_list);\n final org.apache.calcite.linq4j.AbstractEnumerable child = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[6]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n final Object[] current = (Object[]) inputEnumerator.current();\n final Object input_value = current[0];\n final Object input_value0 = current[1];\n final Object input_value1 = current[2];\n final Object input_value2 = current[3];\n final Object input_value3 = current[4];\n final Object input_value4 = current[5];\n return new Object[] {\n input_value,\n input_value0,\n input_value1,\n input_value2,\n input_value3,\n input_value4};\n }\n\n static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue();\n };\n }\n\n };\n return child.take(10000);\n}\n\n\npublic Class getElementType() {\n return java.lang.Object[].class;\n}\n\n\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n", + "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n return v1stashed.scan();\n}\n\n\npublic Class getElementType() {\n return java.lang.Object[].class;\n}\n\n\n" } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 3814da35540..d0cac82b23f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -82,25 +82,29 @@ public void supportSearchSargPushDown_timeRange() throws IOException { } // Only for Calcite - @Ignore("https://github.com/opensearch-project/OpenSearch/issues/3725") + @Test public void testJoinWithCriteriaAndMaxOption() throws IOException { + // TODO could be optimized with https://github.com/opensearch-project/OpenSearch/issues/3725 + enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_bank | join max=1 left=l right=r on" + " l.account_number=r.account_number opensearch-sql_test_index_bank"; - var result = explainQueryToString(query); - String expected = loadExpectedPlan("explain_join_with_criteria_max_option.json"); - assertJsonEqualsIgnoreId(expected, result); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_join_with_criteria_max_option.yaml"); + assertYamlEqualsIgnoreId(expected, result); } // Only for Calcite - @Ignore("https://github.com/opensearch-project/OpenSearch/issues/3725") + @Test public void testJoinWithFieldListAndMaxOption() throws IOException { + // TODO could be optimized with https://github.com/opensearch-project/OpenSearch/issues/3725 + enabledOnlyWhenPushdownIsEnabled(); String query = "source=opensearch-sql_test_index_bank | join type=inner max=1 account_number" + " opensearch-sql_test_index_bank"; - var result = explainQueryToString(query); - String expected = loadExpectedPlan("explain_join_with_fields_max_option.json"); - assertJsonEqualsIgnoreId(expected, result); + var result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_join_with_fields_max_option.yaml"); + assertYamlEqualsIgnoreId(expected, result); } // Only for Calcite @@ -1838,4 +1842,115 @@ public void testInternalItemAccessOnStructs() throws IOException { + " info.dummy_sub_field", TEST_INDEX_WEBLOGS))); } + + @Test + public void testComplexDedup() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_complex1.yaml"); + assertYamlEqualsIgnoreId( + expected, explainQueryYaml("source=opensearch-sql_test_index_account | dedup 1 gender")); + expected = loadExpectedPlan("explain_dedup_complex2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " dedup 1 gender, state")); + expected = loadExpectedPlan("explain_dedup_complex3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml("source=opensearch-sql_test_index_account | dedup 2 gender, state")); + expected = loadExpectedPlan("explain_dedup_complex4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " dedup 2 gender, state")); + } + + @Ignore("https://github.com/opensearch-project/sql/issues/4789") + public void testDedupExpr() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_expr1.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | dedup 1" + + " new_gender")); + expected = loadExpectedPlan("explain_dedup_expr2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval new_gender = lower(gender), new_state = lower(state) | dedup 1 new_gender," + + " new_state")); + expected = loadExpectedPlan("explain_dedup_expr3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | eval" + + " new_state = lower(state) | dedup 2 new_gender, new_state")); + expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval new_gender = lower(gender) | eval new_state = lower(state) | sort gender," + + " -state | dedup 2 new_gender, new_state")); + } + + @Ignore("https://github.com/opensearch-project/sql/issues/4789") + public void testDedupRename() throws IOException { + // rename changes nothing, reuse the same yaml files of testDedupExpr() + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_expr1.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval tmp_gender = lower(gender) | rename" + + " tmp_gender as new_gender | dedup 1 new_gender")); + expected = loadExpectedPlan("explain_dedup_expr2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval tmp_gender = lower(gender), tmp_state = lower(state) | rename tmp_gender" + + " as new_gender | rename tmp_state as new_state | dedup 1 new_gender," + + " new_state")); + expected = loadExpectedPlan("explain_dedup_expr3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | eval tmp_gender = lower(gender) | eval" + + " tmp_state = lower(state) | rename tmp_gender as new_gender | rename tmp_state" + + " as new_state | dedup 2 new_gender, new_state")); + expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename" + + " tmp_gender as new_gender | rename tmp_state as new_state | sort gender," + + " -state | dedup 2 new_gender, new_state")); + } + + @Ignore("SortExprIndexScanRule not work?") + public void testDedupRename2() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_expr4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | fields account_number, gender, age, state |" + + " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename" + + " tmp_gender as new_gender | rename tmp_state as new_state | sort new_gender," + + " -new_state | dedup 2 new_gender, new_state")); + } + + @Test + public void testDedupTextTypeNotPushdown() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_dedup_text_type_no_push.yaml"); + assertYamlEqualsIgnoreId( + expected, explainQueryYaml(String.format("source=%s | dedup email", TEST_INDEX_BANK))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java index fc9604d62fd..3a2e6d69f3b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDedupIT.java @@ -248,4 +248,119 @@ public void testReorderDedupFieldsShouldNotAffectResult() throws IOException { schema("category", null, "string"), schema("id", null, "int")); } + + @Test + public void testDedupComplex() throws IOException { + JSONObject actual = + executeQuery(String.format("source=%s | dedup 1 name", TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", "A", 1), + rows("Z", "B", 1), + rows("X", "C", 1), + rows("Z", "D", 1), + rows(null, "E", 1)); + actual = + executeQuery( + String.format( + "source=%s | fields category, name | dedup 1 name", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, rows("X", "A"), rows("Z", "B"), rows("X", "C"), rows("Z", "D"), rows(null, "E")); + actual = + executeQuery( + String.format("source=%s | dedup 1 name, category", TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", "A", 1), + rows("Y", "A", 1), + rows("Y", "B", 1), + rows("Z", "B", 1), + rows("X", "C", 1), + rows("Z", "D", 1)); + actual = + executeQuery( + String.format( + "source=%s | fields category, id, name | dedup 2 name, category", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", 1, "A"), + rows("X", 1, "A"), + rows("Y", 1, "A"), + rows("Y", 1, "A"), + rows("Y", 1, "B"), + rows("Z", 1, "B"), + rows("Z", 1, "B"), + rows("X", 1, "C"), + rows("X", 1, "C"), + rows("Z", 1, "D")); + } + + @Test + public void testDedupExpr() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval new_name = lower(name) | dedup 1 new_name", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", "A", 1, "a"), + rows("Z", "B", 1, "b"), + rows("X", "C", 1, "c"), + rows("Z", "D", 1, "d"), + rows(null, "E", 1, "e")); + actual = + executeQuery( + String.format( + "source=%s | fields category, name, id | eval new_name = lower(name), new_category" + + " = lower(category) | dedup 1 new_name, new_category", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", "C", 1, "c", "x"), + rows("Z", "D", 1, "d", "z"), + rows("X", "A", 1, "a", "x"), + rows("Y", "B", 1, "b", "y"), + rows("Y", "A", 1, "a", "y"), + rows("Z", "B", 1, "b", "z")); + actual = + executeQuery( + String.format( + "source=%s | eval new_name = lower(name), new_category = lower(category) | dedup 2" + + " name, category", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("Y", "A", 1, "a", "y"), + rows("Y", "A", 1, "a", "y"), + rows("Z", "B", 1, "b", "z"), + rows("Z", "B", 1, "b", "z"), + rows("X", "A", 1, "a", "x"), + rows("X", "A", 1, "a", "x"), + rows("Y", "B", 1, "b", "y"), + rows("Z", "D", 1, "d", "z"), + rows("X", "C", 1, "c", "x"), + rows("X", "C", 1, "c", "x")); + actual = + executeQuery( + String.format( + "source=%s | fields category, id, name | eval new_name = lower(name) | eval" + + " new_category = lower(category) | sort name, -category | dedup 2 new_name," + + " new_category", + TEST_INDEX_DUPLICATION_NULLABLE)); + verifyDataRows( + actual, + rows("X", 1, "C", "c", "x"), + rows("X", 1, "C", "c", "x"), + rows("Z", 1, "D", "d", "z"), + rows("X", 1, "A", "a", "x"), + rows("X", 1, "A", "a", "x"), + rows("Y", 1, "B", "b", "y"), + rows("Y", 1, "A", "a", "y"), + rows("Y", 1, "A", "a", "y"), + rows("Z", 1, "B", "b", "z"), + rows("Z", 1, "B", "b", "z")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 9c34586e06d..42a9ddfb98a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -485,32 +485,32 @@ public void testStatsByTimeSpan() throws IOException { TEST_INDEX_BANK))); } - @Ignore("https://github.com/opensearch-project/OpenSearch/issues/3725") + @Test public void testDedupPushdown() throws IOException { - String expected = loadExpectedPlan("explain_dedup_push.json"); - assertJsonEqualsIgnoreId( + String expected = loadExpectedPlan("explain_dedup_push.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age" + " | dedup 1 gender")); } @Test - public void testDedupKeepEmptyTruePushdown() throws IOException { - String expected = loadExpectedPlan("explain_dedup_keepempty_true_push.json"); - assertJsonEqualsIgnoreId( + public void testDedupKeepEmptyTrueNotPushed() throws IOException { + String expected = loadExpectedPlan("explain_dedup_keepempty_true_not_pushed.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age" + " | dedup gender KEEPEMPTY=true")); } - @Ignore("https://github.com/opensearch-project/OpenSearch/issues/3725") + @Test public void testDedupKeepEmptyFalsePushdown() throws IOException { - String expected = loadExpectedPlan("explain_dedup_keepempty_false_push.json"); - assertJsonEqualsIgnoreId( + String expected = loadExpectedPlan("explain_dedup_keepempty_false_push.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_account | fields account_number, gender, age" + " | dedup gender KEEPEMPTY=false")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index fb5c0be026b..b7e030e1295 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -425,7 +425,9 @@ private static String eliminateTimeStamp(String s) { } private static String eliminateRelId(String s) { - return s.replaceAll("rel#\\d+", "rel#").replaceAll("RelSubset#\\d+", "RelSubset#"); + return s.replaceAll("rel#\\d+", "rel#") + .replaceAll("RelSubset#\\d+", "RelSubset#") + .replaceAll("LogicalProject#\\d+", "LogicalProject#"); } private static String eliminatePid(String s) { @@ -452,6 +454,7 @@ private static String cleanUpYaml(String s) { return s.replaceAll("\"utcTimestamp\":\\d+", "\"utcTimestamp\": 0") .replaceAll("rel#\\d+", "rel#") .replaceAll("RelSubset#\\d+", "RelSubset#") + .replaceAll("LogicalProject#\\d+", "LogicalProject#") .replaceAll("pitId=[^,]+,", "pitId=*,"); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml new file mode 100644 index 00000000000..1b4c379f9b6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[<=($17, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + LogicalFilter(condition=[IS NOT NULL($4)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml new file mode 100644 index 00000000000..6d5c76b1443 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3]) + LogicalFilter(condition=[<=($4, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1, $3)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml new file mode 100644 index 00000000000..af72f304286 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) + LogicalFilter(condition=[<=($17, 2)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $7 ORDER BY $4, $7)]) + LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml new file mode 100644 index 00000000000..a2385ccdbb2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3]) + LogicalFilter(condition=[<=($4, 2)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3 ORDER BY $1, $3)]) + LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr1.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr2.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr3.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_expr4.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.json deleted file mode 100644 index d662486df85..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)])\n LogicalFilter(condition=[IS NOT NULL($1)])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], FILTER->IS NOT NULL($1), COLLAPSE->gender, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"gender\",\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]},\"collapse\":{\"field\":\"gender.keyword\"}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml new file mode 100644 index 00000000000..c16dde54e22 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml new file mode 100644 index 00000000000..978cedd813f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_not_pushed.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t1)], expr#5=[1], expr#6=[<=($t3, $t5)], expr#7=[OR($t4, $t6)], proj#0..2=[{exprs}], $condition=[$t7]) + EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_push.json deleted file mode 100644 index 92b6103864f..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_true_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t1)], expr#5=[1], expr#6=[<=($t3, $t5)], expr#7=[OR($t4, $t6)], proj#0..2=[{exprs}], $condition=[$t7])\n EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.json deleted file mode 100644 index d662486df85..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)])\n LogicalFilter(condition=[IS NOT NULL($1)])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], FILTER->IS NOT NULL($1), COLLAPSE->gender, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"gender\",\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]},\"collapse\":{\"field\":\"gender.keyword\"}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml new file mode 100644 index 00000000000..c16dde54e22 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml new file mode 100644 index 00000000000..451fc8ff5d7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_text_type_no_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($19, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $11 ORDER BY $11)]) + LogicalFilter(condition=[IS NOT NULL($11)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) + EnumerableWindow(window#0=[window(partition {11} order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($11)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"email","boost":1.0}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.json deleted file mode 100644 index 08db116a7c9..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], r.account_number=[$13], r.firstname=[$14], r.address=[$15], r.birthdate=[$16], r.gender=[$17], r.city=[$18], r.lastname=[$19], r.balance=[$20], r.employer=[$21], r.state=[$22], r.age=[$23], r.email=[$24], r.male=[$25])\n LogicalJoin(condition=[=($0, $13)], joinType=[inner])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[<=($13, 1)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT->[{\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($0), COLLAPSE->account_number, SORT->[{\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"account_number\",\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}}],\"collapse\":{\"field\":\"account_number\"}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml new file mode 100644 index 00000000000..7b4e5516c04 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_criteria_max_option.yaml @@ -0,0 +1,28 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], r.account_number=[$13], r.firstname=[$14], r.address=[$15], r.birthdate=[$16], r.gender=[$17], r.city=[$18], r.lastname=[$19], r.balance=[$20], r.employer=[$21], r.state=[$22], r.age=[$23], r.email=[$24], r.male=[$25]) + LogicalJoin(condition=[=($0, $13)], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], SORT->[{ + "account_number" : { + "order" : "asc", + "missing" : "_last" + } + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) + EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[IS NOT NULL($t0)], proj#0..12=[{exprs}], $condition=[$t13]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.json deleted file mode 100644 index c1ee2aa0b30..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$13], firstname=[$14], address=[$15], birthdate=[$16], gender=[$17], city=[$18], lastname=[$19], balance=[$20], employer=[$21], state=[$22], age=[$23], email=[$24], male=[$25])\n LogicalJoin(condition=[=($0, $13)], joinType=[inner])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[<=($13, 1)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableCalc(expr#0..13=[{inputs}], proj#0..12=[{exprs}])\n EnumerableLimit(fetch=[10000])\n EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->IS NOT NULL($0), COLLAPSE->account_number, SORT->[{\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"exists\":{\"field\":\"account_number\",\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}}],\"collapse\":{\"field\":\"account_number\"}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number], SORT->[{\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\"],\"excludes\":[]},\"sort\":[{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml new file mode 100644 index 00000000000..71ab58f77d3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_join_with_fields_max_option.yaml @@ -0,0 +1,29 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$13], firstname=[$14], address=[$15], birthdate=[$16], gender=[$17], city=[$18], lastname=[$19], balance=[$20], employer=[$21], state=[$22], age=[$23], email=[$24], male=[$25]) + LogicalJoin(condition=[=($0, $13)], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + LogicalFilter(condition=[<=($13, 1)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..13=[{inputs}], account_number=[$t1], firstname=[$t2], address=[$t3], birthdate=[$t4], gender=[$t5], city=[$t6], lastname=[$t7], balance=[$t8], employer=[$t9], state=[$t10], age=[$t11], email=[$t12], male=[$t13]) + EnumerableLimit(fetch=[10000]) + EnumerableMergeJoin(condition=[=($0, $1)], joinType=[inner]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number], SORT->[{ + "account_number" : { + "order" : "asc", + "missing" : "_last" + } + }]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number"],"excludes":[]},"sort":[{"account_number":{"order":"asc","missing":"_last"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + EnumerableSort(sort0=[$0], dir0=[ASC]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[1], expr#15=[<=($t13, $t14)], proj#0..12=[{exprs}], $condition=[$t15]) + EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=[IS NOT NULL($t0)], proj#0..12=[{exprs}], $condition=[$t13]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":50000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.json deleted file mode 100644 index 625dc968ab4..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)])\n LogicalFilter(condition=[IS NOT NULL($1)])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19])\n EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml new file mode 100644 index 00000000000..2706c009421 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_false_push.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19]) + EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml new file mode 100644 index 00000000000..ee74cf48e4a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_not_pushed.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NULL($t4)], expr#19=[1], expr#20=[<=($t17, $t19)], expr#21=[OR($t18, $t20)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t21]) + EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_push.json deleted file mode 100644 index d1592e9fa89..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_keepempty_true_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[OR(IS NULL($1), <=($3, 1))])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NULL($t4)], expr#19=[1], expr#20=[<=($t17, $t19)], expr#21=[OR($t18, $t20)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t21])\n EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.json deleted file mode 100644 index 625dc968ab4..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)])\n LogicalFilter(condition=[IS NOT NULL($1)])\n LogicalProject(account_number=[$0], gender=[$4], age=[$8])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19])\n EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml new file mode 100644 index 00000000000..2706c009421 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_dedup_push.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2]) + LogicalFilter(condition=[<=($3, 1)]) + LogicalProject(account_number=[$0], gender=[$1], age=[$2], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1 ORDER BY $1)]) + LogicalFilter(condition=[IS NOT NULL($1)]) + LogicalProject(account_number=[$0], gender=[$4], age=[$8]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..17=[{inputs}], expr#18=[1], expr#19=[<=($t17, $t18)], account_number=[$t0], gender=[$t4], age=[$t8], $condition=[$t19]) + EnumerableWindow(window#0=[window(partition {4} order by [4] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t4)], proj#0..16=[{exprs}], $condition=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.json deleted file mode 100644 index 11ca44cdea2..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_criteria_max_option.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], r.account_number=[$13], r.firstname=[$14], r.address=[$15], r.birthdate=[$16], r.gender=[$17], r.city=[$18], r.lastname=[$19], r.balance=[$20], r.employer=[$21], r.state=[$22], r.age=[$23], r.email=[$24], r.male=[$25])\n LogicalJoin(condition=[=($0, $13)], joinType=[inner])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[<=($13, 1)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableMergeJoin(condition=[=($0, $13)], joinType=[inner])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..18=[{inputs}], proj#0..12=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..19=[{inputs}], expr#20=[1], expr#21=[<=($t19, $t20)], proj#0..12=[{exprs}], $condition=[$t21])\n EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t0)], proj#0..18=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.json deleted file mode 100644 index a2b931bba32..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_join_with_fields_max_option.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$13], firstname=[$14], address=[$15], birthdate=[$16], gender=[$17], city=[$18], lastname=[$19], balance=[$20], employer=[$21], state=[$22], age=[$23], email=[$24], male=[$25])\n LogicalJoin(condition=[=($0, $13)], joinType=[inner])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n LogicalFilter(condition=[<=($13, 1)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableCalc(expr#0..13=[{inputs}], account_number=[$t1], firstname=[$t2], address=[$t3], birthdate=[$t4], gender=[$t5], city=[$t6], lastname=[$t7], balance=[$t8], employer=[$t9], state=[$t10], age=[$t11], email=[$t12], male=[$t13])\n EnumerableLimit(fetch=[10000])\n EnumerableMergeJoin(condition=[=($0, $1)], joinType=[inner])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..18=[{inputs}], account_number=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n EnumerableSort(sort0=[$0], dir0=[ASC])\n EnumerableCalc(expr#0..19=[{inputs}], expr#20=[1], expr#21=[<=($t19, $t20)], proj#0..12=[{exprs}], $condition=[$t21])\n EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[IS NOT NULL($t0)], proj#0..18=[{exprs}], $condition=[$t19])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.json deleted file mode 100644 index 4f85572e388..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "DedupeOperator", - "description": { - "dedupeList": "[gender]", - "allowedDuplication": 1, - "keepEmpty": false, - "consecutive": false - }, - "children": [{ - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - }] - }] - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml new file mode 100644 index 00000000000..7ff290348e9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_false_push.yaml @@ -0,0 +1,24 @@ +root: + name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: DedupeOperator + description: + dedupeList: "[gender]" + allowedDuplication: 1 + keepEmpty: false + consecutive: false + children: + - name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ + excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml new file mode 100644 index 00000000000..3a711878435 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_not_pushed.yaml @@ -0,0 +1,24 @@ +root: + name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: DedupeOperator + description: + dedupeList: "[gender]" + allowedDuplication: 1 + keepEmpty: true + consecutive: false + children: + - name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ + excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_push.json deleted file mode 100644 index 46fa0793af9..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_keepempty_true_push.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "DedupeOperator", - "description": { - "dedupeList": "[gender]", - "allowedDuplication": 1, - "keepEmpty": true, - "consecutive": false - }, - "children": [{ - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - }] - }] - } -} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.json deleted file mode 100644 index e7728735ee0..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "DedupeOperator", - "description": { - "dedupeList": "[gender]", - "allowedDuplication": 1, - "keepEmpty": false, - "consecutive": false - }, - "children": [{ - "name": "ProjectOperator", - "description": { - "fields": "[account_number, gender, age]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - }] - }] - } -} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml new file mode 100644 index 00000000000..7ff290348e9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_dedup_push.yaml @@ -0,0 +1,24 @@ +root: + name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: DedupeOperator + description: + dedupeList: "[gender]" + allowedDuplication: 1 + keepEmpty: false + consecutive: false + children: + - name: ProjectOperator + description: + fields: "[account_number, gender, age]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ + _source\":{\"includes\":[\"account_number\",\"gender\",\"age\"],\"\ + excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ + \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4563_4664.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4563_4664.yml index b5aa11876bb..f5ecf4eb1cf 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4563_4664.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4563_4664.yml @@ -43,11 +43,11 @@ teardown: Content-Type: 'application/json' ppl: body: - query: source=test | rename status as http_status | dedup http_status | fields http_status + query: source=test | rename status as http_status | dedup http_status | fields http_status | sort http_status - match: { total: 3 } - match: { schema: [{"name": "http_status", "type": "string"}] } - - match: { datarows: [["200"], ["500"], ["404"]] } + - match: { datarows: [["200"], ["404"], ["500"]] } --- "4664: Test rename then filter": diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchAliasType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchAliasType.java index eedd2b1eef0..1954134c11b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchAliasType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchAliasType.java @@ -8,6 +8,7 @@ import java.util.List; import java.util.Optional; import java.util.Set; +import lombok.Getter; import org.opensearch.sql.data.type.ExprType; /** @@ -21,7 +22,7 @@ public class OpenSearchAliasType extends OpenSearchDataType { public static final Set objectFieldTypes = Set.of(MappingType.Object, MappingType.Nested); private final String path; - private final OpenSearchDataType originalType; + @Getter private final OpenSearchDataType originalType; public OpenSearchAliasType(String path, OpenSearchDataType type) { super(type.getExprCoreType()); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 2aca2ad334e..fc5610d73f0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -259,7 +259,7 @@ private ExprValue parseContent(Content content) { * value. For example, {"empty_field": []}. */ private Optional type(String field) { - return Optional.ofNullable(typeMapping.get(field)); + return Optional.ofNullable(typeMapping.get(field)).map(ExprType::getOriginalType); } /** @@ -316,6 +316,11 @@ private static ExprValue parseDateTimeString(String value, OpenSearchDateType da } private static ExprValue createOpenSearchDateType(Content value, ExprType type) { + return createOpenSearchDateType(value, type, false); + } + + private static ExprValue createOpenSearchDateType( + Content value, ExprType type, Boolean supportArrays) { OpenSearchDateType dt = (OpenSearchDateType) type; ExprCoreType returnFormat = dt.getExprCoreType(); if (value.isNumber()) { // isNumber diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java index d87611f213b..0f6c654df79 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java @@ -164,7 +164,7 @@ public interface Config extends OpenSearchRuleConfig { // 1. No RexOver and no duplicate projection // 2. Contains width_bucket function on date field referring // to bin command with parameter bins - Predicate.not(PlanUtils::containsRexOver) + Predicate.not(LogicalProject::containsOver) .and(PlanUtils::distinctProjectList) .or(Config::containsWidthBucketFuncOnDate)) .oneInput( @@ -225,7 +225,7 @@ public interface Config extends OpenSearchRuleConfig { .oneInput( b1 -> b1.operand(LogicalFilter.class) - .predicate(Config::mayBeFilterFromBucketNonNull) + .predicate(PlanUtils::mayBeFilterFromBucketNonNull) .oneInput( b2 -> b2.operand(LogicalProject.class) @@ -236,7 +236,7 @@ public interface Config extends OpenSearchRuleConfig { // 2. Contains width_bucket function on date // field referring // to bin command with parameter bins - Predicate.not(PlanUtils::containsRexOver) + Predicate.not(LogicalProject::containsOver) .and(PlanUtils::distinctProjectList) .or(Config::containsWidthBucketFuncOnDate)) .oneInput( @@ -263,18 +263,19 @@ public interface Config extends OpenSearchRuleConfig { b1 -> b1.operand(LogicalProject.class) .predicate( - Predicate.not(PlanUtils::containsRexOver) + Predicate.not(LogicalProject::containsOver) .and(PlanUtils::distinctProjectList)) .oneInput( b2 -> b2.operand(LogicalFilter.class) - .predicate(Config::mayBeFilterFromBucketNonNull) + .predicate(PlanUtils::mayBeFilterFromBucketNonNull) .oneInput( b3 -> b3.operand(LogicalProject.class) .predicate( Predicate.not( - PlanUtils::containsRexOver) + LogicalProject + ::containsOver) .and( PlanUtils ::distinctProjectList) @@ -300,21 +301,6 @@ default AggregateIndexScanRule toRule() { return new AggregateIndexScanRule(this); } - static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) { - RexNode condition = filter.getCondition(); - return isNotNullOnRef(condition) - || (condition instanceof RexCall rexCall - && rexCall.getOperator().equals(SqlStdOperatorTable.AND) - && rexCall.getOperands().stream() - .allMatch(AggregateIndexScanRule.Config::isNotNullOnRef)); - } - - private static boolean isNotNullOnRef(RexNode rex) { - return rex instanceof RexCall rexCall - && rexCall.isA(SqlKind.IS_NOT_NULL) - && rexCall.getOperands().get(0) instanceof RexInputRef; - } - static boolean containsWidthBucketFuncOnDate(LogicalProject project) { return project.getProjects().stream() .anyMatch( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index bdace1bd0a3..068900d3f18 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -5,20 +5,23 @@ package org.opensearch.sql.opensearch.planner.rules; -import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; - +import java.util.ArrayList; import java.util.List; import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelRule; +import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindow; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.mapping.Mapping; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.immutables.value.Value; @@ -37,106 +40,260 @@ protected DedupPushdownRule(Config config) { @Override public void onMatch(RelOptRuleCall call) { - final LogicalProject finalOutput = call.rel(0); - // TODO Used when number of duplication is more than 1 + final LogicalProject finalProject = call.rel(0); final LogicalFilter numOfDedupFilter = call.rel(1); final LogicalProject projectWithWindow = call.rel(2); - final CalciteLogicalIndexScan scan = call.rel(3); - List windows = PlanUtils.getRexWindowFromProject(projectWithWindow); - if (windows.isEmpty() || windows.stream().anyMatch(w -> w.partitionKeys.size() > 1)) { - // TODO leverage inner_hits for multiple partition keys - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup with multiple fields"); - } - return; + if (call.rels.length == 5) { + final CalciteLogicalIndexScan scan = call.rel(4); + apply(call, finalProject, numOfDedupFilter, projectWithWindow, scan); + } else { + throw new AssertionError( + String.format( + "The length of rels should be %s but got %s", + this.operands.size(), call.rels.length)); } - final List fieldNameList = projectWithWindow.getInput().getRowType().getFieldNames(); - List selectColumns = PlanUtils.getSelectColumns(windows.getFirst().partitionKeys); - String fieldName = fieldNameList.get(selectColumns.getFirst()); + } - CalciteLogicalIndexScan newScan = scan.pushDownCollapse(finalOutput, fieldName); - if (newScan != null) { - call.transformTo(newScan); + protected void apply( + RelOptRuleCall call, + LogicalProject finalProject, + LogicalFilter numOfDedupFilter, + LogicalProject projectWithWindow, + CalciteLogicalIndexScan scan) { + List windows = PlanUtils.getRexWindowFromProject(projectWithWindow); + if (windows.size() != 1) { + return; } - } - private static boolean validFilter(LogicalFilter filter) { - if (filter.getCondition().getKind() != SqlKind.LESS_THAN_OR_EQUAL) { - return false; + List dedupColumns = windows.get(0).partitionKeys; + if (dedupColumns.stream() + .filter(rex -> rex.isA(SqlKind.INPUT_REF)) + .anyMatch(rex -> rex.getType().getSqlTypeName() == SqlTypeName.MAP)) { + LOG.debug("Cannot pushdown the dedup since the dedup fields contains MAP type"); + // TODO https://github.com/opensearch-project/sql/issues/4564 + return; } - List operandsOfCondition = ((RexCall) filter.getCondition()).getOperands(); - RexNode leftOperand = operandsOfCondition.getFirst(); - if (!(leftOperand instanceof RexInputRef ref)) { + if (projectWithWindow.getProjects().stream() + .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) + .filter(Predicate.not(dedupColumns::contains)) + .anyMatch(rex -> !rex.isA(SqlKind.INPUT_REF))) { + // TODO fallback to the approach of Collapse search + // | eval new_age = age + 1 | fields gender, new_age | dedup 1 gender if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup since the left operand is not RexInputRef"); + LOG.debug( + "Cannot pushdown the dedup since the final outputs contain a column which is not" + + " included in table schema"); } - return false; + return; } - String referenceName = filter.getRowType().getFieldNames().get(ref.getIndex()); - if (!referenceName.equals(ROW_NUMBER_COLUMN_FOR_DEDUP)) { + + List rexCallsExceptWindow = + projectWithWindow.getProjects().stream() + .filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)) + .filter(rex -> rex instanceof RexCall) + .toList(); + if (!rexCallsExceptWindow.isEmpty() + && dedupColumnsContainRexCall(rexCallsExceptWindow, dedupColumns)) { + // TODO https://github.com/opensearch-project/sql/issues/4789 + // | eval new_gender = lower(gender) | fields new_gender, age | dedup 1 new_gender if (LOG.isDebugEnabled()) { - LOG.debug( - "Cannot pushdown the dedup since the left operand is not {}", - ROW_NUMBER_COLUMN_FOR_DEDUP); + LOG.debug("Cannot pushdown the dedup since the dedup columns contain RexCall"); } - return false; + return; } - RexNode rightOperand = operandsOfCondition.getLast(); - if (!(rightOperand instanceof RexLiteral numLiteral)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup since the right operand is not RexLiteral"); + + // must be row_number <= number + assert numOfDedupFilter.getCondition().isA(SqlKind.LESS_THAN_OR_EQUAL); + RexLiteral literal = + (RexLiteral) ((RexCall) numOfDedupFilter.getCondition()).getOperands().getLast(); + Integer dedupNumer = literal.getValueAs(Integer.class); + + // We convert the dedup pushdown to composite aggregate + top_hits: + // Aggregate(literalAgg(dedupNumer), groups) + // +- Project(groups, remaining) + // +- Scan + // Step 1: Initial a RelBuilder to build aggregate by pushing Scan and Project + RelBuilder relBuilder = call.builder(); + relBuilder.push(scan); + // To baseline the rowType, merge the fields() and projectWithWindow + List mergedRexList = new ArrayList<>(); + List mergedFieldNames = new ArrayList<>(); + List builderFields = relBuilder.fields(); + List projectFields = projectWithWindow.getProjects(); + List builderFieldNames = relBuilder.peek().getRowType().getFieldNames(); + List projectFieldNames = projectWithWindow.getRowType().getFieldNames(); + + // Add existing fields with proper names + // For rename case: source = t | rename old as new | dedup new + for (RexNode field : builderFields) { + mergedRexList.add(field); + int projectIndex = projectFields.indexOf(field); + if (projectIndex >= 0) { + mergedFieldNames.add(projectFieldNames.get(projectIndex)); + } else { + mergedFieldNames.add(builderFieldNames.get(builderFields.indexOf(field))); } - return false; } - Integer num = numLiteral.getValueAs(Integer.class); - if (num == null || num > 1) { - // TODO leverage inner_hits for num > 1 - if (LOG.isDebugEnabled()) { - LOG.debug("Cannot pushdown the dedup since number of duplicate events is larger than 1"); + // Append new fields from project (excluding ROW_NUMBER and duplicates) + for (RexNode field : projectFields) { + if (!field.isA(SqlKind.ROW_NUMBER) && !builderFields.contains(field)) { + mergedRexList.add(field); + mergedFieldNames.add(field.toString()); } - return false; } - return true; + // Force add the project + relBuilder.project(mergedRexList, mergedFieldNames, true); + LogicalProject baseline = (LogicalProject) relBuilder.peek(); + Mapping mappingForDedupColumns = + PlanUtils.mapping(dedupColumns, relBuilder.peek().getRowType()); + + // Step 2: Push a Project which groups is first, then remaining finalOutput columns + List reordered = new ArrayList<>(PlanUtils.getInputRefs(dedupColumns)); + baseline.getProjects().stream() + .filter(Predicate.not(dedupColumns::contains)) + .forEach(reordered::add); + relBuilder.project(reordered); + // childProject includes all list of finalOutput columns + LogicalProject childProject = (LogicalProject) relBuilder.peek(); + + // Step 3: Push an Aggregate + // We push down a LITERAL_AGG with dedupNumer for converting the dedup command to aggregate: + // (1) Pass the dedupNumer to AggregateAnalyzer.processAggregateCalls() + // (2) Distinguish it from an optimization operator and user defined aggregator. + // (LITERAL_AGG is used in optimization normally, see {@link SqlKind#LITERAL_AGG}) + final List newDedupColumns = RexUtil.apply(mappingForDedupColumns, dedupColumns); + relBuilder.aggregate(relBuilder.groupKey(newDedupColumns), relBuilder.literalAgg(dedupNumer)); + // add bucket_nullable = false hint + PlanUtils.addIgnoreNullBucketHintToAggregate(relBuilder); + // peek the aggregate after hint being added + LogicalAggregate aggregate = (LogicalAggregate) relBuilder.build(); + + CalciteLogicalIndexScan newScan = + (CalciteLogicalIndexScan) scan.pushDownAggregate(aggregate, childProject); + if (newScan != null) { + // Reorder back to original order + call.transformTo(newScan.copyWithNewSchema(finalProject.getRowType())); + } + } + + private static boolean dedupColumnsContainRexCall( + List calls, List dedupColumns) { + List dedupColumnIndicesFromCall = + PlanUtils.getSelectColumns(calls).stream().distinct().toList(); + List dedupColumnsIndicesFromPartitionKeys = + PlanUtils.getSelectColumns(dedupColumns).stream().distinct().toList(); + return dedupColumnsIndicesFromPartitionKeys.stream() + .anyMatch(dedupColumnIndicesFromCall::contains); } - /** - * Match fixed pattern:
    - * LogicalProject(remove _row_number_dedup_)
    - * LogicalFilter(condition=[<=($1, numOfDedup)])
    - * LogicalProject(..., _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])
    - * LogicalFilter(condition=[IS NOT NULL($0)])
    - */ @Value.Immutable public interface Config extends OpenSearchRuleConfig { + // Can only push the case with KEEPEMPTY=false: + // +- LogicalProject(no _row_number_dedup_) + // +- LogicalFilter(condition contains _row_number_dedup_) + // +- LogicalProject(contains _row_number_dedup_) + // +- LogicalFilter(condition=IS NOT NULL(dedupColumn))" + // +- CalciteLogicalIndexScan Config DEFAULT = ImmutableDedupPushdownRule.Config.builder() .build() + .withDescription("Dedup-to-Aggregate") .withOperandSupplier( b0 -> b0.operand(LogicalProject.class) + .predicate(Predicate.not(PlanUtils::containsRowNumberDedup)) .oneInput( b1 -> b1.operand(LogicalFilter.class) - .predicate(DedupPushdownRule::validFilter) + .predicate(Config::validDedupNumberChecker) .oneInput( b2 -> b2.operand(LogicalProject.class) .predicate(PlanUtils::containsRowNumberDedup) .oneInput( b3 -> - b3.operand(CalciteLogicalIndexScan.class) + b3.operand(LogicalFilter.class) .predicate( - Predicate.not( - AbstractCalciteIndexScan - ::isLimitPushed) - .and( - AbstractCalciteIndexScan - ::noAggregatePushed)) - .noInputs())))); + PlanUtils + ::mayBeFilterFromBucketNonNull) + .oneInput( + b4 -> + b4.operand( + CalciteLogicalIndexScan + .class) + .predicate( + Predicate.not( + AbstractCalciteIndexScan + ::isLimitPushed) + .and( + AbstractCalciteIndexScan + ::noAggregatePushed) + .and( + AbstractCalciteIndexScan + ::isProjectPushed)) + .noInputs()))))); + // +- LogicalProject(no _row_number_dedup_) + // +- LogicalFilter(condition contains _row_number_dedup_) + // +- LogicalProject(contains _row_number_dedup_) + // +- LogicalFilter(condition IS NOT NULL(dedupColumn)) + // +- LogicalProject(dedupColumn is call) + // +- CalciteLogicalIndexScan + Config DEDUP_EXPR = + ImmutableDedupPushdownRule.Config.builder() + .build() + .withDescription("DedupWithExpression-to-Aggregate") + .withOperandSupplier( + b0 -> + b0.operand(LogicalProject.class) + .predicate(Predicate.not(PlanUtils::containsRowNumberDedup)) + .oneInput( + b1 -> + b1.operand(LogicalFilter.class) + .predicate(Config::validDedupNumberChecker) + .oneInput( + b2 -> + b2.operand(LogicalProject.class) + .predicate(PlanUtils::containsRowNumberDedup) + .oneInput( + b3 -> + b3.operand(LogicalFilter.class) + .predicate(Config::isNotNull) + .oneInput( + b4 -> + b4.operand(LogicalProject.class) + .predicate( + PlanUtils + ::containsRexCall) + .oneInput( + b5 -> + b5.operand( + CalciteLogicalIndexScan + .class) + .predicate( + Predicate + .not( + AbstractCalciteIndexScan + ::isLimitPushed) + .and( + AbstractCalciteIndexScan + ::noAggregatePushed) + .and( + AbstractCalciteIndexScan + ::isProjectPushed)) + .noInputs())))))); @Override default DedupPushdownRule toRule() { return new DedupPushdownRule(this); } + + private static boolean validDedupNumberChecker(LogicalFilter filter) { + return filter.getCondition().isA(SqlKind.LESS_THAN_OR_EQUAL) + && PlanUtils.containsRowNumberDedup(filter); + } + + private static boolean isNotNull(LogicalFilter filter) { + return filter.getCondition().isA(SqlKind.IS_NOT_NULL); + } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java index a09be09a34b..2034eb1c6d8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java @@ -204,7 +204,7 @@ public interface Config extends OpenSearchRuleConfig { b1.operand(EnumerableProject.class) .predicate( Predicate.not(Project::containsOver) - .and(PlanUtils::projectContainsExpr)) + .and(PlanUtils::containsRexCall)) .anyInputs())); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index c991b1964e9..ef55868b202 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -29,6 +29,8 @@ public class OpenSearchIndexRules { SortIndexScanRule.Config.DEFAULT.toRule(); private static final DedupPushdownRule DEDUP_PUSH_DOWN = DedupPushdownRule.Config.DEFAULT.toRule(); + private static final DedupPushdownRule DEDUP_EXPR_PUSH_DOWN = + DedupPushdownRule.Config.DEDUP_EXPR.toRule(); private static final SortProjectExprTransposeRule SORT_PROJECT_EXPR_TRANSPOSE = SortProjectExprTransposeRule.Config.DEFAULT.toRule(); private static final ExpandCollationOnProjectExprRule EXPAND_COLLATION_ON_PROJECT_EXPR = @@ -54,8 +56,9 @@ public class OpenSearchIndexRules { BUCKET_NON_NULL_AGG_WITH_UDF_INDEX_SCAN, LIMIT_INDEX_SCAN, SORT_INDEX_SCAN, - // TODO enable if https://github.com/opensearch-project/OpenSearch/issues/3725 resolved - // DEDUP_PUSH_DOWN, + DEDUP_PUSH_DOWN, + // TODO https://github.com/opensearch-project/sql/issues/4789 + // DEDUP_EXPR_PUSH_DOWN, SORT_PROJECT_EXPR_TRANSPOSE, SORT_AGGREGATION_METRICS_RULE, RARE_TOP_PUSH_DOWN, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java index 9f27bfd3954..fe0fae8e64a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java @@ -134,7 +134,7 @@ public interface Config extends OpenSearchRuleConfig { b1.operand(LogicalProject.class) .predicate( Predicate.not(LogicalProject::containsOver) - .and(PlanUtils::projectContainsExpr)) + .and(PlanUtils::containsRexCall)) .anyInputs())); @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 2abfb5a401b..b52d4a06820 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -73,14 +73,17 @@ import org.opensearch.search.aggregations.metrics.ValueCountAggregationBuilder; import org.opensearch.search.aggregations.support.ValueType; import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; +import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; +import org.opensearch.sql.opensearch.request.PredicateAnalyzer.ScriptQueryExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CountAsTotalHitsParser; @@ -146,14 +149,9 @@ > T build(RexNode node, T sourceBuilde T build(RexNode node, Function fieldBuilder, Function scriptBuilder) { if (node == null) return fieldBuilder.apply(METADATA_FIELD); else if (node instanceof RexInputRef ref) { - return fieldBuilder.apply( - new NamedFieldExpression(ref.getIndex(), rowType.getFieldNames(), fieldTypes) - .getReferenceForTermQuery()); + return fieldBuilder.apply(inferNamedField(node).getReferenceForTermQuery()); } else if (node instanceof RexCall || node instanceof RexLiteral) { - return scriptBuilder.apply( - (new PredicateAnalyzer.ScriptQueryExpression( - node, rowType, fieldTypes, cluster, Collections.emptyMap())) - .getScript()); + return scriptBuilder.apply(inferScript(node).getScript()); } throw new IllegalStateException( String.format("Metric aggregation doesn't support RexNode %s", node)); @@ -167,6 +165,15 @@ NamedFieldExpression inferNamedField(RexNode node) { String.format("Cannot infer field name from RexNode %s", node)); } + ScriptQueryExpression inferScript(RexNode node) { + if (node instanceof RexCall || node instanceof RexLiteral) { + return new ScriptQueryExpression( + node, rowType, fieldTypes, cluster, Collections.emptyMap()); + } + throw new IllegalStateException( + String.format("Metric aggregation doesn't support RexNode %s", node)); + } + T inferValue(RexNode node, Class clazz) { if (node instanceof RexLiteral literal) { return literal.getValueAs(clazz); @@ -341,10 +348,21 @@ private static Pair> processAggregateCalls( return Pair.of(metricBuilder, metricParserList); } + /** + * Convert aggregate arguments through child project. Normally, just return the rex nodes of + * Project which are included in aggCall expression. If the aggCall is a LITERAL_AGG, it returns + * all rex nodes of Project except WindowFunction. + * + * @param aggCall the aggregate call + * @param project the project + * @return the converted RexNode list + */ private static List convertAggArgThroughProject(AggregateCall aggCall, Project project) { return project == null ? List.of() - : aggCall.getArgList().stream().map(project.getProjects()::get).toList(); + : PlanUtils.getObjectFromLiteralAgg(aggCall) != null + ? project.getProjects().stream().filter(rex -> !rex.isA(SqlKind.ROW_NUMBER)).toList() + : aggCall.getArgList().stream().map(project.getProjects()::get).toList(); } private static Pair createAggregationBuilderAndParser( @@ -417,7 +435,7 @@ private static Pair createRegularAggregation( .sort( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), SortOrder.ASC), - new TopHitsParser(aggFieldName, true)); + new TopHitsParser(aggFieldName, true, false)); } } case MAX -> { @@ -436,7 +454,7 @@ private static Pair createRegularAggregation( .sort( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery(), SortOrder.DESC), - new TopHitsParser(aggFieldName, true)); + new TopHitsParser(aggFieldName, true, false)); } } case VAR_SAMP -> @@ -486,7 +504,7 @@ yield switch (functionName) { helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()) .size(helper.inferValue(args.getLast(), Integer.class)) .from(0), - new TopHitsParser(aggFieldName)); + new TopHitsParser(aggFieldName, false, true)); case FIRST -> { TopHitsAggregationBuilder firstBuilder = AggregationBuilders.topHits(aggFieldName).size(1).from(0); @@ -494,7 +512,7 @@ yield switch (functionName) { firstBuilder.fetchField( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()); } - yield Pair.of(firstBuilder, new TopHitsParser(aggFieldName, true)); + yield Pair.of(firstBuilder, new TopHitsParser(aggFieldName, true, false)); } case LAST -> { TopHitsAggregationBuilder lastBuilder = @@ -506,7 +524,7 @@ yield switch (functionName) { lastBuilder.fetchField( helper.inferNamedField(args.getFirst()).getReferenceForTermQuery()); } - yield Pair.of(lastBuilder, new TopHitsParser(aggFieldName, true)); + yield Pair.of(lastBuilder, new TopHitsParser(aggFieldName, true, false)); } case PERCENTILE_APPROX -> { PercentilesAggregationBuilder aggBuilder = @@ -530,6 +548,38 @@ yield switch (functionName) { String.format("Unsupported push-down aggregator %s", aggCall.getAggregation())); }; } + case LITERAL_AGG -> { + RexLiteral literal = PlanUtils.getObjectFromLiteralAgg(aggCall); + if (literal == null || !(literal.getValue() instanceof Number)) { + throw new AggregateAnalyzer.AggregateAnalyzerException( + String.format("Unsupported push-down aggregator %s", aggCall.getAggregation())); + } + Integer dedupNumber = literal.getValueAs(Integer.class); + // Disable fetchSource since TopHitsParser only parses fetchField currently. + TopHitsAggregationBuilder topHitsAggregationBuilder = + AggregationBuilders.topHits(aggFieldName).from(0).size(dedupNumber); + List sources = new ArrayList<>(); + List scripts = new ArrayList<>(); + args.forEach( + rex -> { + if (rex instanceof RexInputRef) { + sources.add(helper.inferNamedField(rex).getReference()); + } else if (rex instanceof RexCall || rex instanceof RexLiteral) { + scripts.add( + new SearchSourceBuilder.ScriptField( + rex.toString(), helper.inferScript(rex).getScript(), false)); + } else { + throw new AggregateAnalyzer.AggregateAnalyzerException( + String.format( + "Unsupported push-down aggregator %s due to rex type is %s", + aggCall.getAggregation(), rex.getKind())); + } + }); + topHitsAggregationBuilder.fetchSource( + sources.stream().distinct().toArray(String[]::new), new String[0]); + topHitsAggregationBuilder.scriptFields(scripts); + yield Pair.of(topHitsAggregationBuilder, new TopHitsParser(aggFieldName, false, false)); + } default -> throw new AggregateAnalyzer.AggregateAnalyzerException( String.format("unsupported aggregator %s", aggCall.getAggregation())); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 7a0a18c79ac..5c3ecd11e49 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -73,6 +73,8 @@ public class OpenSearchRequestBuilder { @ToString.Exclude private final Settings settings; + @ToString.Exclude private boolean topHitsAgg = false; + public static class PushDownUnSupportedException extends RuntimeException { public PushDownUnSupportedException(String message) { super(message); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index 4f3e37ac4e6..e35c7efcf7b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -229,7 +229,7 @@ private Iterator handleAggregationResponse() { for (Map.Entry value : entry.entrySet()) { builder.put( value.getKey(), - exprValueFactory.construct(value.getKey(), value.getValue(), false)); + exprValueFactory.construct(value.getKey(), value.getValue(), true)); } return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/ArgMaxMinParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/ArgMaxMinParser.java index 55dacd7081c..597da4e4967 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/ArgMaxMinParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/ArgMaxMinParser.java @@ -6,6 +6,8 @@ package org.opensearch.sql.opensearch.response.agg; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import lombok.Value; import org.opensearch.search.SearchHit; @@ -19,20 +21,22 @@ public class ArgMaxMinParser implements MetricParser { String name; @Override - public Map parse(Aggregation agg) { + public List> parse(Aggregation agg) { TopHits topHits = (TopHits) agg; SearchHit[] hits = topHits.getHits().getHits(); if (hits.length == 0) { - return Collections.singletonMap(agg.getName(), null); + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), null))); } // Get value from fields (fetchField) if (hits[0].getFields() != null && !hits[0].getFields().isEmpty()) { Object value = hits[0].getFields().values().iterator().next().getValue(); - return Collections.singletonMap(agg.getName(), value); + return Collections.singletonList(Collections.singletonMap(agg.getName(), value)); + } else { + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), null))); } - - return Collections.singletonMap(agg.getName(), null); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index db6e4eef248..761aef4fd98 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -6,6 +6,7 @@ package org.opensearch.sql.opensearch.response.agg; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -84,9 +85,11 @@ private boolean isLeafAgg(Aggregations aggregations) { } private List> parseLeafAgg(Aggregations aggregations, long docCount) { - Map resultMap = metricsParser.parse(aggregations); - countAggNameList.forEach(countAggName -> resultMap.put(countAggName, docCount)); - return List.of(resultMap); + List> resultMapList = metricsParser.parse(aggregations); + List> maps = + resultMapList.isEmpty() ? List.of(new HashMap<>()) : resultMapList; + countAggNameList.forEach(countAggName -> maps.forEach(map -> map.put(countAggName, docCount))); + return maps; } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java index 57941311d44..4646ffa9d3e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/CompositeAggregationParser.java @@ -5,11 +5,12 @@ package org.opensearch.sql.opensearch.response.agg; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; import org.opensearch.search.SearchHits; @@ -26,8 +27,6 @@ public class CompositeAggregationParser implements OpenSearchAggregationResponseParser { private final MetricParserHelper metricsParser; - // countAggNameList dedicated the list of count aggregations which are filled by doc_count - private List countAggNameList = List.of(); public CompositeAggregationParser(MetricParser... metricParserList) { metricsParser = new MetricParserHelper(Arrays.asList(metricParserList)); @@ -37,25 +36,17 @@ public CompositeAggregationParser(List metricParserList) { metricsParser = new MetricParserHelper(metricParserList); } - /** CompositeAggregationParser with count aggregation name list, used in v3 */ - public CompositeAggregationParser( - List metricParserList, List countAggNameList) { - metricsParser = new MetricParserHelper(metricParserList); - this.countAggNameList = countAggNameList; - } - @Override public List> parse(Aggregations aggregations) { return ((CompositeAggregation) aggregations.asList().get(0)) - .getBuckets().stream().map(this::parse).collect(Collectors.toList()); + .getBuckets().stream().map(this::parse).flatMap(Collection::stream).toList(); } - private Map parse(CompositeAggregation.Bucket bucket) { - Map resultMap = new HashMap<>(); - resultMap.putAll(bucket.getKey()); - resultMap.putAll(metricsParser.parse(bucket.getAggregations())); - countAggNameList.forEach(name -> resultMap.put(name, bucket.getDocCount())); - return resultMap; + private List> parse(CompositeAggregation.Bucket bucket) { + List> resultMapList = new ArrayList<>(); + resultMapList.add(new HashMap<>(bucket.getKey())); + resultMapList.addAll(metricsParser.parse(bucket.getAggregations())); + return resultMapList; } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FilterParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FilterParser.java index 95571df30a7..de9a4a2fbd0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FilterParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FilterParser.java @@ -5,6 +5,7 @@ package org.opensearch.sql.opensearch.response.agg; +import java.util.List; import java.util.Map; import lombok.Builder; import lombok.EqualsAndHashCode; @@ -25,7 +26,7 @@ public class FilterParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation aggregations) { + public List> parse(Aggregation aggregations) { return metricsParser.parse(((Filter) aggregations).getAggregations().asList().get(0)); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParser.java index 04781d2a94a..db163bcd97a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParser.java @@ -5,6 +5,7 @@ package org.opensearch.sql.opensearch.response.agg; +import java.util.List; import java.util.Map; import org.opensearch.search.aggregations.Aggregation; @@ -20,5 +21,5 @@ public interface MetricParser { * @param aggregation {@link Aggregation} * @return the map between metric name and metric value. */ - Map parse(Aggregation aggregation); + List> parse(Aggregation aggregation); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParserHelper.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParserHelper.java index 8886668abb0..cc85df5bf7b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParserHelper.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/MetricParserHelper.java @@ -5,7 +5,8 @@ package org.opensearch.sql.opensearch.response.agg; -import java.util.HashMap; +import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -45,18 +46,26 @@ public MetricParserHelper(List metricParserList, List coun * @param aggregations {@link Aggregations} * @return the map between metric name and metric value. */ - public Map parse(Aggregations aggregations) { - Map resultMap = new HashMap<>(); + public List> parse(Aggregations aggregations) { + List> resultMapList = new ArrayList<>(); + Map mergeMap = new LinkedHashMap<>(); for (Aggregation aggregation : aggregations) { - if (metricParserMap.containsKey(aggregation.getName())) { - resultMap.putAll(metricParserMap.get(aggregation.getName()).parse(aggregation)); - } else { + MetricParser parser = metricParserMap.get(aggregation.getName()); + if (parser == null) { throw new RuntimeException( StringUtils.format( "couldn't parse field %s in aggregation response", aggregation.getName())); } + List> resList = parser.parse(aggregation); + if (resList.size() == 1) { // single value parser + mergeMap.putAll(resList.get(0)); + } else if (resList.size() > 1) { // top_hits parser + resultMapList.addAll(resList); + } + } + if (!mergeMap.isEmpty()) { + resultMapList.add(mergeMap); } - // countAggNameList.forEach(name -> resultMap.put(name, bucket.getDocCount())); - return resultMap; + return resultMapList; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/NoBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/NoBucketAggregationParser.java index df8dcdd4ce9..a43a0251a48 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/NoBucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/NoBucketAggregationParser.java @@ -6,7 +6,6 @@ package org.opensearch.sql.opensearch.response.agg; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Map; import lombok.Getter; @@ -29,7 +28,7 @@ public NoBucketAggregationParser(List metricParserList) { @Override public List> parse(Aggregations aggregations) { - return Collections.singletonList(metricsParser.parse(aggregations)); + return metricsParser.parse(aggregations); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/PercentilesParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/PercentilesParser.java index c9d78a94418..ce538f1405c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/PercentilesParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/PercentilesParser.java @@ -7,6 +7,8 @@ import com.google.common.collect.Streams; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; @@ -23,14 +25,16 @@ public class PercentilesParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation agg) { - return Collections.singletonMap( - agg.getName(), - // TODO a better implementation here is providing a class `MultiValueParser` - // similar to `SingleValueParser`. However, there is no method `values()` available - // in `org.opensearch.search.aggregations.metrics.MultiValue`. - Streams.stream(((Percentiles) agg).iterator()) - .map(Percentile::getValue) - .collect(Collectors.toList())); + public List> parse(Aggregation agg) { + return Collections.singletonList( + new HashMap<>( + Collections.singletonMap( + agg.getName(), + // TODO a better implementation here is providing a class `MultiValueParser` + // similar to `SingleValueParser`. However, there is no method `values()` available + // in `org.opensearch.search.aggregations.metrics.MultiValue`. + Streams.stream(((Percentiles) agg).iterator()) + .map(Percentile::getValue) + .collect(Collectors.toList())))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SinglePercentileParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SinglePercentileParser.java index 9665d863fc7..ef47d9d629a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SinglePercentileParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SinglePercentileParser.java @@ -7,6 +7,8 @@ import com.google.common.collect.Streams; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -21,12 +23,14 @@ public class SinglePercentileParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation agg) { - return Collections.singletonMap( - agg.getName(), - // TODO `Percentiles` implements interface - // `org.opensearch.search.aggregations.metrics.MultiValue`, but there is not - // method `values()` available in this interface. So we - Streams.stream(((Percentiles) agg).iterator()).findFirst().get().getValue()); + public List> parse(Aggregation agg) { + return Collections.singletonList( + new HashMap<>( + Collections.singletonMap( + agg.getName(), + // TODO `Percentiles` implements interface + // `org.opensearch.search.aggregations.metrics.MultiValue`, but there is not + // method `values()` available in this interface. So we + Streams.stream(((Percentiles) agg).iterator()).findFirst().get().getValue()))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java index 5487f9ca605..f2a3ab785d4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/SingleValueParser.java @@ -8,6 +8,8 @@ import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanInfValue; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -23,8 +25,11 @@ public class SingleValueParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation agg) { - return Collections.singletonMap( - agg.getName(), handleNanInfValue(((NumericMetricsAggregation.SingleValue) agg).value())); + public List> parse(Aggregation agg) { + return Collections.singletonList( + new HashMap<>( + Collections.singletonMap( + agg.getName(), + handleNanInfValue(((NumericMetricsAggregation.SingleValue) agg).value())))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java index ccda391c5eb..d1e044fdd87 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/StatsParser.java @@ -8,6 +8,8 @@ import static org.opensearch.sql.opensearch.response.agg.Utils.handleNanInfValue; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.function.Function; import lombok.EqualsAndHashCode; @@ -26,8 +28,10 @@ public class StatsParser implements MetricParser { @Getter private final String name; @Override - public Map parse(Aggregation agg) { - return Collections.singletonMap( - agg.getName(), handleNanInfValue(valueExtractor.apply((ExtendedStats) agg))); + public List> parse(Aggregation agg) { + return Collections.singletonList( + new HashMap<>( + Collections.singletonMap( + agg.getName(), handleNanInfValue(valueExtractor.apply((ExtendedStats) agg))))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java index c9d9cf61d9e..0d662fcc385 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/TopHitsParser.java @@ -7,10 +7,15 @@ import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; +import org.opensearch.common.document.DocumentField; import org.opensearch.search.SearchHit; import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.metrics.TopHits; @@ -21,45 +26,72 @@ public class TopHitsParser implements MetricParser { @Getter private final String name; private final boolean returnSingleValue; + private final boolean returnMergeValue; - public TopHitsParser(String name) { - this.name = name; - this.returnSingleValue = false; - } - - public TopHitsParser(String name, boolean returnSingleValue) { + public TopHitsParser(String name, boolean returnSingleValue, boolean returnMergeValue) { this.name = name; this.returnSingleValue = returnSingleValue; + this.returnMergeValue = returnMergeValue; } @Override - public Map parse(Aggregation agg) { + public List> parse(Aggregation agg) { TopHits topHits = (TopHits) agg; SearchHit[] hits = topHits.getHits().getHits(); if (hits.length == 0) { - return Collections.singletonMap(agg.getName(), null); + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), null))); } if (returnSingleValue) { + if (hits[0].getFields() == null || hits[0].getFields().isEmpty()) { + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), null))); + } // Extract the single value from the first (and only) hit from fields (fetchField) - if (hits[0].getFields() != null && !hits[0].getFields().isEmpty()) { - Object value = hits[0].getFields().values().iterator().next().getValue(); - return Collections.singletonMap(agg.getName(), value); + Object value = hits[0].getFields().values().iterator().next().getValue(); + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), value))); + } else if (returnMergeValue) { + if (hits[0].getFields() == null || hits[0].getFields().isEmpty()) { + return Collections.singletonList( + new HashMap<>(Collections.singletonMap(agg.getName(), Collections.emptyList()))); } - return Collections.singletonMap(agg.getName(), null); - } else { // Return all values as a list from fields (fetchField) - if (hits[0].getFields() != null && !hits[0].getFields().isEmpty()) { - return Collections.singletonMap( - agg.getName(), - Arrays.stream(hits) - .flatMap(h -> h.getFields().values().stream()) - .map(f -> f.getValue()) - .filter(v -> v != null) // Filter out null values - .collect(Collectors.toList())); - } - return Collections.singletonMap(agg.getName(), Collections.emptyList()); + return Collections.singletonList( + Collections.singletonMap( + agg.getName(), + Arrays.stream(hits) + .flatMap(h -> h.getFields().values().stream()) + .map(DocumentField::getValue) + .filter(Objects::nonNull) // Filter out null values + .collect(Collectors.toList()))); + } else { + // "hits": { + // "hits": [ + // { + // "_source": { + // "name": "A", + // "category": "X" + // } + // }, + // { + // "_source": { + // "name": "A", + // "category": "Y" + // } + // } + // ] + // } + // will converts to: + // List[ + // LinkedHashMap["name" -> "A", "category" -> "X"], + // LinkedHashMap["name" -> "A", "category" -> "Y"] + // ] + return Arrays.stream(hits) + .>map(hit -> new LinkedHashMap<>(hit.getSourceAsMap())) + .toList(); } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 11421fca0a1..29b240613b5 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -125,7 +125,6 @@ public double estimateRowCount(RelMetadataQuery mq) { case SORT_AGG_METRICS -> NumberUtil.min(rowCount, osIndex.getBucketSize().doubleValue()); // Refer the org.apache.calcite.rel.metadata.RelMdRowCount - case COLLAPSE -> rowCount / 10; case FILTER, SCRIPT -> NumberUtil.multiply( rowCount, @@ -182,11 +181,6 @@ public double estimateRowCount(RelMetadataQuery mq) { sortKeys.stream().filter(digest -> digest.getExpression() != null).count(); dCpu += NumberUtil.multiply(dRows, 1.1 * complexExprCount); } - // Refer the org.apache.calcite.rel.metadata.RelMdRowCount.getRowCount(Aggregate rel,...) - case COLLAPSE -> { - dRows = dRows / 10; - dCpu += dRows; - } // Ignore cost the primitive filter but it will affect the rows count. case FILTER -> dRows = @@ -430,4 +424,12 @@ public boolean isMetricsOrderPushed() { public boolean isTopKPushed() { return this.getPushDownContext().isTopKPushed(); } + + public boolean isScriptPushed() { + return this.getPushDownContext().isScriptPushed(); + } + + public boolean isProjectPushed() { + return this.getPushDownContext().isProjectPushed(); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index bf79f803544..2821aa037da 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -227,7 +227,7 @@ public CalciteLogicalIndexScan pushDownCollapse(Project finalOutput, String fiel } CalciteLogicalIndexScan newScan = this.copyWithNewSchema(finalOutput.getRowType()); newScan.pushDownContext.add( - PushDownType.COLLAPSE, + PushDownType.AGGREGATION, fieldName, (OSRequestBuilderAction) requestBuilder -> requestBuilder.pushDownCollapse(field)); return newScan; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 29700fd6606..16c81facace 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -36,6 +36,7 @@ public class PushDownContext extends AbstractCollection { private boolean isSortExprPushed = false; private boolean isTopKPushed = false; private boolean isRareTopPushed = false; + private boolean isScriptPushed = false; public PushDownContext(OpenSearchIndex osIndex) { this.osIndex = osIndex; @@ -130,6 +131,9 @@ public boolean add(PushDownOperation operation) { if (operation.type() == PushDownType.RARE_TOP) { isRareTopPushed = true; } + if (operation.type() == PushDownType.SCRIPT) { + isScriptPushed = true; + } return true; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java index 81927e9f8d6..c763808164d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownType.java @@ -12,8 +12,7 @@ public enum PushDownType { AGGREGATION, SORT, LIMIT, - SCRIPT, - COLLAPSE, + SCRIPT, // script in predicate SORT_AGG_METRICS, // convert composite aggregate to terms or multi-terms bucket aggregate RARE_TOP, // convert composite aggregate to nested aggregate SORT_EXPR diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java index 0f523d65341..0a189584af3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/MetricAggregationBuilder.java @@ -156,7 +156,7 @@ public Pair visitNamedAggregator( node.getArguments().get(1), condition, name, - new TopHitsParser(name)); + new TopHitsParser(name, false, true)); case "percentile": case "percentile_approx": return make( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index ec0e5d919b3..660744c8bb7 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -359,7 +359,8 @@ void analyze_firstAggregation() throws ExpressionNotAnalyzableException { .withAggCall(b -> b.aggregateCall(PPLBuiltinOperators.FIRST, b.field("a")).as("first_a")) .expectDslQuery( "[{\"first_a\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false}}}]") - .expectResponseParser(new MetricParserHelper(List.of(new TopHitsParser("first_a", true)))) + .expectResponseParser( + new MetricParserHelper(List.of(new TopHitsParser("first_a", true, false)))) .verify(); } @@ -369,7 +370,8 @@ void analyze_lastAggregation() throws ExpressionNotAnalyzableException { .withAggCall(b -> b.aggregateCall(PPLBuiltinOperators.LAST, b.field("b")).as("last_b")) .expectDslQuery( "[{\"last_b\":{\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"sort\":[{\"_doc\":{\"order\":\"desc\"}}]}}}]") - .expectResponseParser(new MetricParserHelper(List.of(new TopHitsParser("last_b", true)))) + .expectResponseParser( + new MetricParserHelper(List.of(new TopHitsParser("last_b", true, false)))) .verify(); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java index 5dc88ad5d64..7ba64eaa475 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java @@ -131,9 +131,9 @@ void two_bucket_one_metric_should_pass() { new CompositeAggregationParser(new SingleValueParser("avg")); assertThat( parse(parser, response), - containsInAnyOrder( - ImmutableMap.of("type", "cost", "region", "us", "avg", 20d), - ImmutableMap.of("type", "sale", "region", "uk", "avg", 130d))); + contains( + ImmutableMap.of("type", "cost", "region", "us"), ImmutableMap.of("avg", 20d), + ImmutableMap.of("type", "sale", "region", "uk"), ImmutableMap.of("avg", 130d))); } @Test @@ -296,10 +296,11 @@ void top_hits_aggregation_should_pass() { + " }\n" + "}"; OpenSearchAggregationResponseParser parser = - new CompositeAggregationParser(new TopHitsParser("take")); + new CompositeAggregationParser(new TopHitsParser("take", false, true)); assertThat( parse(parser, response), - contains(ImmutableMap.of("type", "take", "take", ImmutableList.of("m", "f")))); + contains( + ImmutableMap.of("type", "take"), ImmutableMap.of("take", ImmutableList.of("m", "f")))); } /** SELECT PERCENTILE(age, 50) FROM accounts. */ @@ -422,9 +423,11 @@ void two_bucket_one_metric_percentile_should_pass() { new SinglePercentileParser("percentile"), new SingleValueParser("max")); assertThat( parse(parser, response), - containsInAnyOrder( - ImmutableMap.of("type", "cost", "region", "us", "percentile", 40d), - ImmutableMap.of("type", "sale", "region", "uk", "percentile", 100d))); + contains( + ImmutableMap.of("type", "cost", "region", "us"), + ImmutableMap.of("percentile", 40d), + ImmutableMap.of("type", "sale", "region", "uk"), + ImmutableMap.of("percentile", 100d))); } /** SELECT PERCENTILES(age) FROM accounts. */ @@ -560,21 +563,11 @@ void two_bucket_percentiles_should_pass() { new CompositeAggregationParser(new PercentilesParser("percentiles")); assertThat( parse(parser, response), - containsInAnyOrder( - ImmutableMap.of( - "type", - "cost", - "region", - "us", - "percentiles", - List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)), - ImmutableMap.of( - "type", - "sale", - "region", - "uk", - "percentiles", - List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)))); + contains( + ImmutableMap.of("type", "cost", "region", "us"), + ImmutableMap.of("percentiles", List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)), + ImmutableMap.of("type", "sale", "region", "uk"), + ImmutableMap.of("percentiles", List.of(21.0, 27.0, 30.0, 35.0, 55.0, 58.0, 60.0)))); } public List> parse(OpenSearchAggregationResponseParser parser, String json) { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java index c67d7cfaa3e..021a64aad7d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/CalciteIndexScanCostTest.java @@ -191,19 +191,6 @@ void test_cost_on_sort_pushdown() { assertEquals(99000, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); } - @Test - void test_cost_on_collapse_pushdown() { - RelDataType relDataType = mock(RelDataType.class); - lenient().when(relDataType.getFieldList()).thenReturn(new MockFieldList(10)); - lenient().when(table.getRowType()).thenReturn(relDataType); - - CalciteLogicalIndexScan scan = new CalciteLogicalIndexScan(cluster, table, osIndex); - scan.getPushDownContext() - .add( - new PushDownOperation(PushDownType.COLLAPSE, null, (OSRequestBuilderAction) req -> {})); - assertEquals(9900, Objects.requireNonNull(scan.computeSelfCost(planner, mq)).getRows()); - } - @Test void test_cost_on_aggregate_pushdown() { RelDataType relDataType = mock(RelDataType.class); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 6cf3c91fe7b..0e14e5e6cbc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -184,4 +184,98 @@ public void testDedupKeepEmpty2() { + "WHERE `DEPTNO` IS NULL OR `JOB` IS NULL OR `_row_number_dedup_` <= 2"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testDedupExpr() { + String ppl = + "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields EMPNO, ENAME, JOB, DEPTNO, NEW_DEPTNO |" + + " dedup 1 NEW_DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$3], NEW_DEPTNO=[$4])\n" + + " LogicalFilter(condition=[<=($5, 1)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$3], NEW_DEPTNO=[$4]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4 ORDER BY $4)])\n" + + " LogicalFilter(condition=[IS NOT NULL($4)])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], DEPTNO=[$7]," + + " NEW_DEPTNO=[+($7, 1)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | fields EMPNO, ENAME, JOB, DEPTNO | eval NEW_DEPTNO = DEPTNO + 1 | dedup 1" + + " NEW_DEPTNO"; + root = getRelNode(ppl); + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields NEW_DEPTNO, EMPNO, ENAME, JOB | dedup 1" + + " JOB"; + root = getRelNode(ppl); + expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $3 ORDER BY $3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | eval NEW_DEPTNO = DEPTNO + 1 | fields NEW_DEPTNO, EMPNO, ENAME, JOB | sort" + + " NEW_DEPTNO | dedup 1 NEW_DEPTNO"; + root = getRelNode(ppl); + expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testRenameDedup() { + String ppl = + "source=EMP | eval TEMP_DEPTNO = DEPTNO + 1 | rename TEMP_DEPTNO as NEW_DEPTNO | fields" + + " NEW_DEPTNO, EMPNO, ENAME, JOB | dedup 1 NEW_DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | eval TEMP_DEPTNO = DEPTNO + 1 | rename TEMP_DEPTNO as NEW_DEPTNO | fields" + + " NEW_DEPTNO, EMPNO, ENAME, JOB | dedup 1 JOB"; + root = getRelNode(ppl); + expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $3 ORDER BY $3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + ppl = + "source=EMP | eval TEMP_DEPTNO = DEPTNO + 1 | rename TEMP_DEPTNO as NEW_DEPTNO | fields" + + " NEW_DEPTNO, EMPNO, ENAME, JOB | sort NEW_DEPTNO | dedup 1 NEW_DEPTNO"; + root = getRelNode(ppl); + expectedLogical = + "LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3])\n" + + " LogicalFilter(condition=[<=($4, 1)])\n" + + " LogicalProject(NEW_DEPTNO=[$0], EMPNO=[$1], ENAME=[$2], JOB=[$3]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " LogicalFilter(condition=[IS NOT NULL($0)])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first])\n" + + " LogicalProject(NEW_DEPTNO=[+($7, 1)], EMPNO=[$0], ENAME=[$1], JOB=[$2])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java index ff230540c93..580de2c80fb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java @@ -1002,7 +1002,7 @@ public void testJoinWithFieldListMaxGreaterThanZero() { + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + " LogicalFilter(condition=[<=($3, 1)])\n" + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + " LogicalTableScan(table=[[scott, DEPT]])\n"; verifyLogical(root, expectedLogical); verifyResultCount(root, 14); @@ -1013,9 +1013,9 @@ public void testJoinWithFieldListMaxGreaterThanZero() { + "FROM `scott`.`EMP`\n" + "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n" + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`" - + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_dedup_`\n" + + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_join_max_dedup_`\n" + "FROM `scott`.`DEPT`) `t`\n" - + "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; + + "WHERE `_row_number_join_max_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -1031,7 +1031,7 @@ public void testJoinWithCriteriaMaxGreaterThanZero() { + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + " LogicalFilter(condition=[<=($3, 1)])\n" + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2]," - + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + + " _row_number_join_max_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n" + " LogicalTableScan(table=[[scott, DEPT]])\n"; verifyLogical(root, expectedLogical); verifyResultCount(root, 14); @@ -1043,9 +1043,9 @@ public void testJoinWithCriteriaMaxGreaterThanZero() { + "FROM `scott`.`EMP`\n" + "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n" + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`" - + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_dedup_`\n" + + " ORDER BY `DEPTNO` NULLS LAST) `_row_number_join_max_dedup_`\n" + "FROM `scott`.`DEPT`) `t`\n" - + "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; + + "WHERE `_row_number_join_max_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); } From f348e99ca6b70869b9f69b6605cd55def085cdf1 Mon Sep 17 00:00:00 2001 From: Xinyuan Lu Date: Wed, 26 Nov 2025 16:13:13 +0800 Subject: [PATCH 81/99] fix clickbench query 43 (#4861) * change q43 Signed-off-by: xinyual * change plan after merging latest code Signed-off-by: xinyual --------- Signed-off-by: xinyual --- .../src/test/resources/clickbench/queries/q43.ppl | 9 ++++----- .../expectedOutput/calcite/clickbench/q43.yaml | 11 +++++------ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/integ-test/src/test/resources/clickbench/queries/q43.ppl b/integ-test/src/test/resources/clickbench/queries/q43.ppl index e52d429ba33..0025cd0102c 100644 --- a/integ-test/src/test/resources/clickbench/queries/q43.ppl +++ b/integ-test/src/test/resources/clickbench/queries/q43.ppl @@ -1,14 +1,13 @@ /* -SELECT DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') AS M, COUNT(*) AS PageViews +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 -GROUP BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') -ORDER BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') +GROUP BY DATE_TRUNC('minute', EventTime) +ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; */ source=hits | where CounterID = 62 and EventDate >= '2013-07-01 00:00:00' and EventDate <= '2013-07-15 00:00:00' and IsRefresh = 0 and DontCountHits = 0 -| eval M = date_format(EventTime, '%Y-%m-%d %H:00:00') -| stats bucket_nullable=false count() as PageViews by M +| stats bucket_nullable=false count() as PageViews by span(EventTime, 1m) as M | sort M | head 10 from 1000 diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml index b2a68c85d2f..3bba5e1ed82 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml @@ -4,13 +4,12 @@ calcite: LogicalSort(sort0=[$1], dir0=[ASC-nulls-first], offset=[1000], fetch=[10]) LogicalProject(PageViews=[$1], M=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) - LogicalProject(M=[$111]) - LogicalFilter(condition=[IS NOT NULL($111)]) - LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], M=[DATE_FORMAT($17, '%Y-%m-%d %H:00:00':VARCHAR)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-15 00:00:00':VARCHAR)), =($72, 0), =($42, 0))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalProject(M=[SPAN($17, 1, 'm')]) + LogicalFilter(condition=[IS NOT NULL($17)]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-15 00:00:00':VARCHAR)), =($72, 0), =($42, 0))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], PageViews=[$t1], M=[$t0]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCgHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJEQVRFX0ZPUk1BVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogLTEKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["EventTime","%Y-%m-%d %H:00:00"]}},"missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file From 16b0e9b30531cee6a8ff57b0426bad580ed783d7 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 27 Nov 2025 11:35:28 +0800 Subject: [PATCH 82/99] Remove count aggregation for sort on aggregate measure (#4867) Signed-off-by: Lantao Jin --- .../sql/calcite/big5/PPLBig5IT.java | 96 ++++++++++--------- .../{ => big5}/asc_sort_timestamp.yaml | 0 ...asc_sort_timestamp_can_match_shortcut.yaml | 0 ..._sort_timestamp_no_can_match_shortcut.yaml | 0 .../asc_sort_with_after_timestamp.yaml | 0 .../{ => big5}/cardinality_agg_high.yaml | 0 .../{ => big5}/cardinality_agg_high_2.yaml | 0 .../{ => big5}/cardinality_agg_low.yaml | 0 .../composite_date_histogram_daily.yaml | 0 .../calcite/{ => big5}/composite_terms.yaml | 0 .../{ => big5}/composite_terms_keyword.yaml | 0 .../{ => big5}/date_histogram_hourly_agg.yaml | 0 .../{ => big5}/date_histogram_minute_agg.yaml | 0 .../calcite/{ => big5}/default.yaml | 0 .../{ => big5}/desc_sort_timestamp.yaml | 0 ...esc_sort_timestamp_can_match_shortcut.yaml | 0 ..._sort_timestamp_no_can_match_shortcut.yaml | 0 .../desc_sort_with_after_timestamp.yaml | 0 .../calcite/{ => big5}/keyword_in_range.yaml | 0 .../calcite/{ => big5}/keyword_terms.yaml | 2 +- .../keyword_terms_low_cardinality.yaml | 2 +- .../{ => big5}/multi_terms_keyword.yaml | 2 +- .../{ => big5}/query_string_on_message.yaml | 0 .../query_string_on_message_filtered.yaml | 0 ...string_on_message_filtered_sorted_num.yaml | 0 .../calcite/{ => big5}/range.yaml | 0 .../calcite/{ => big5}/range_agg_1.yaml | 0 .../calcite/{ => big5}/range_agg_2.yaml | 0 .../{ => big5}/range_auto_date_histo.yaml | 0 .../range_auto_date_histo_with_metrics.yaml | 0 ..._conjunction_big_range_big_term_query.yaml | 0 ...onjunction_small_range_big_term_query.yaml | 0 ...junction_small_range_small_term_query.yaml | 0 ...isjunction_big_range_small_term_query.yaml | 0 .../calcite/{ => big5}/range_numeric.yaml | 0 .../{ => big5}/range_with_asc_sort.yaml | 0 .../{ => big5}/range_with_desc_sort.yaml | 0 .../calcite/{ => big5}/scroll.yaml | 0 .../sort_keyword_can_match_shortcut.yaml | 0 .../sort_keyword_no_can_match_shortcut.yaml | 0 .../calcite/{ => big5}/sort_numeric_asc.yaml | 0 .../sort_numeric_asc_with_match.yaml | 0 .../calcite/{ => big5}/sort_numeric_desc.yaml | 0 .../sort_numeric_desc_with_match.yaml | 0 .../calcite/{ => big5}/term.yaml | 0 .../{ => big5}/terms_significant_1.yaml | 0 .../{ => big5}/terms_significant_2.yaml | 0 .../calcite/clickbench/q13.yaml | 2 +- .../calcite/clickbench/q15.yaml | 2 +- .../calcite/clickbench/q16.yaml | 2 +- .../calcite/clickbench/q17.yaml | 2 +- .../calcite/clickbench/q19.yaml | 2 +- .../calcite/clickbench/q22.yaml | 2 +- .../calcite/clickbench/q34.yaml | 2 +- .../calcite/clickbench/q35.yaml | 2 +- .../calcite/clickbench/q36.yaml | 2 +- .../calcite/clickbench/q37.yaml | 2 +- .../calcite/clickbench/q38.yaml | 2 +- .../calcite/clickbench/q39.yaml | 2 +- .../calcite/clickbench/q40.yaml | 2 +- .../calcite/clickbench/q41.yaml | 2 +- .../calcite/clickbench/q42.yaml | 2 +- .../expectedOutput/calcite/clickbench/q8.yaml | 2 +- .../calcite/explain_agg_sort_on_measure1.yaml | 2 +- .../calcite/explain_agg_sort_on_measure3.yaml | 2 +- ...plain_agg_sort_on_measure_multi_terms.yaml | 2 +- ...gg_sort_on_measure_multi_terms_script.yaml | 2 +- .../explain_agg_sort_on_measure_script.yaml | 2 +- .../ppl/{ => big5}/asc_sort_timestamp.yaml | 0 ...asc_sort_timestamp_can_match_shortcut.yaml | 0 ..._sort_timestamp_no_can_match_shortcut.yaml | 0 .../asc_sort_with_after_timestamp.yaml | 0 .../ppl/{ => big5}/cardinality_agg_high.yaml | 0 .../{ => big5}/cardinality_agg_high_2.yaml | 0 .../ppl/{ => big5}/cardinality_agg_low.yaml | 0 .../composite_date_histogram_daily.yaml | 0 .../ppl/{ => big5}/composite_terms.yaml | 0 .../{ => big5}/composite_terms_keyword.yaml | 0 .../{ => big5}/date_histogram_hourly_agg.yaml | 0 .../{ => big5}/date_histogram_minute_agg.yaml | 0 .../ppl/{ => big5}/default.yaml | 0 .../ppl/{ => big5}/desc_sort_timestamp.yaml | 0 ...esc_sort_timestamp_can_match_shortcut.yaml | 0 ..._sort_timestamp_no_can_match_shortcut.yaml | 0 .../desc_sort_with_after_timestamp.yaml | 0 .../ppl/{ => big5}/keyword_in_range.yaml | 0 .../ppl/{ => big5}/keyword_terms.yaml | 0 .../keyword_terms_low_cardinality.yaml | 0 .../ppl/{ => big5}/multi_terms_keyword.yaml | 0 .../{ => big5}/query_string_on_message.yaml | 0 .../query_string_on_message_filtered.yaml | 0 ...string_on_message_filtered_sorted_num.yaml | 0 .../expectedOutput/ppl/{ => big5}/range.yaml | 0 .../ppl/{ => big5}/range_agg_1.yaml | 0 .../ppl/{ => big5}/range_agg_2.yaml | 0 .../ppl/{ => big5}/range_auto_date_histo.yaml | 0 .../range_auto_date_histo_with_metrics.yaml | 0 ..._conjunction_big_range_big_term_query.yaml | 0 ...onjunction_small_range_big_term_query.yaml | 0 ...junction_small_range_small_term_query.yaml | 0 ...isjunction_big_range_small_term_query.yaml | 0 .../ppl/{ => big5}/range_numeric.yaml | 0 .../ppl/{ => big5}/range_with_asc_sort.yaml | 0 .../ppl/{ => big5}/range_with_desc_sort.yaml | 0 .../expectedOutput/ppl/{ => big5}/scroll.yaml | 0 .../sort_keyword_can_match_shortcut.yaml | 0 .../sort_keyword_no_can_match_shortcut.yaml | 0 .../ppl/{ => big5}/sort_numeric_asc.yaml | 0 .../sort_numeric_asc_with_match.yaml | 0 .../ppl/{ => big5}/sort_numeric_desc.yaml | 0 .../sort_numeric_desc_with_match.yaml | 0 .../expectedOutput/ppl/{ => big5}/term.yaml | 0 .../ppl/{ => big5}/terms_significant_1.yaml | 0 .../ppl/{ => big5}/terms_significant_2.yaml | 0 .../scan/context/AggPushDownAction.java | 15 ++- 115 files changed, 81 insertions(+), 78 deletions(-) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/asc_sort_timestamp.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/asc_sort_timestamp_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/asc_sort_timestamp_no_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/asc_sort_with_after_timestamp.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/cardinality_agg_high.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/cardinality_agg_high_2.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/cardinality_agg_low.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/composite_date_histogram_daily.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/composite_terms.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/composite_terms_keyword.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/date_histogram_hourly_agg.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/date_histogram_minute_agg.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/default.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/desc_sort_timestamp.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/desc_sort_timestamp_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/desc_sort_timestamp_no_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/desc_sort_with_after_timestamp.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/keyword_in_range.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/keyword_terms.yaml (85%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/keyword_terms_low_cardinality.yaml (85%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/multi_terms_keyword.yaml (90%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/query_string_on_message.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/query_string_on_message_filtered.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/query_string_on_message_filtered_sorted_num.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_agg_1.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_agg_2.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_auto_date_histo.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_auto_date_histo_with_metrics.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_field_conjunction_big_range_big_term_query.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_field_conjunction_small_range_big_term_query.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_field_conjunction_small_range_small_term_query.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_field_disjunction_big_range_small_term_query.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_numeric.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_with_asc_sort.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/range_with_desc_sort.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/scroll.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/sort_keyword_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/sort_keyword_no_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/sort_numeric_asc.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/sort_numeric_asc_with_match.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/sort_numeric_desc.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/sort_numeric_desc_with_match.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/term.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/terms_significant_1.yaml (100%) rename integ-test/src/test/resources/expectedOutput/calcite/{ => big5}/terms_significant_2.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/asc_sort_timestamp.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/asc_sort_timestamp_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/asc_sort_timestamp_no_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/asc_sort_with_after_timestamp.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/cardinality_agg_high.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/cardinality_agg_high_2.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/cardinality_agg_low.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/composite_date_histogram_daily.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/composite_terms.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/composite_terms_keyword.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/date_histogram_hourly_agg.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/date_histogram_minute_agg.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/default.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/desc_sort_timestamp.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/desc_sort_timestamp_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/desc_sort_timestamp_no_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/desc_sort_with_after_timestamp.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/keyword_in_range.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/keyword_terms.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/keyword_terms_low_cardinality.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/multi_terms_keyword.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/query_string_on_message.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/query_string_on_message_filtered.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/query_string_on_message_filtered_sorted_num.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_agg_1.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_agg_2.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_auto_date_histo.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_auto_date_histo_with_metrics.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_field_conjunction_big_range_big_term_query.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_field_conjunction_small_range_big_term_query.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_field_conjunction_small_range_small_term_query.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_field_disjunction_big_range_small_term_query.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_numeric.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_with_asc_sort.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/range_with_desc_sort.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/scroll.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/sort_keyword_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/sort_keyword_no_can_match_shortcut.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/sort_numeric_asc.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/sort_numeric_asc_with_match.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/sort_numeric_desc.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/sort_numeric_desc_with_match.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/term.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/terms_significant_1.yaml (100%) rename integ-test/src/test/resources/expectedOutput/ppl/{ => big5}/terms_significant_2.yaml (100%) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/big5/PPLBig5IT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/big5/PPLBig5IT.java index 4997d361203..2c2b91411c1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/big5/PPLBig5IT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/big5/PPLBig5IT.java @@ -53,7 +53,7 @@ public static void reset() throws IOException { public void asc_sort_timestamp() throws IOException { String ppl = sanitize(loadExpectedQuery("asc_sort_timestamp.ppl")); timing(summary, "asc_sort_timestamp", ppl); - String expected = loadExpectedPlan("asc_sort_timestamp.yaml"); + String expected = loadExpectedPlan("big5/asc_sort_timestamp.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -61,7 +61,7 @@ public void asc_sort_timestamp() throws IOException { public void asc_sort_timestamp_can_match_shortcut() throws IOException { String ppl = sanitize(loadExpectedQuery("asc_sort_timestamp_can_match_shortcut.ppl")); timing(summary, "asc_sort_timestamp_can_match_shortcut", ppl); - String expected = loadExpectedPlan("asc_sort_timestamp_can_match_shortcut.yaml"); + String expected = loadExpectedPlan("big5/asc_sort_timestamp_can_match_shortcut.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -69,7 +69,7 @@ public void asc_sort_timestamp_can_match_shortcut() throws IOException { public void asc_sort_timestamp_no_can_match_shortcut() throws IOException { String ppl = sanitize(loadExpectedQuery("asc_sort_timestamp_no_can_match_shortcut.ppl")); timing(summary, "asc_sort_timestamp_no_can_match_shortcut", ppl); - String expected = loadExpectedPlan("asc_sort_timestamp_no_can_match_shortcut.yaml"); + String expected = loadExpectedPlan("big5/asc_sort_timestamp_no_can_match_shortcut.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -77,7 +77,7 @@ public void asc_sort_timestamp_no_can_match_shortcut() throws IOException { public void asc_sort_with_after_timestamp() throws IOException { String ppl = sanitize(loadExpectedQuery("asc_sort_with_after_timestamp.ppl")); timing(summary, "asc_sort_with_after_timestamp", ppl); - String expected = loadExpectedPlan("asc_sort_with_after_timestamp.yaml"); + String expected = loadExpectedPlan("big5/asc_sort_with_after_timestamp.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -85,7 +85,7 @@ public void asc_sort_with_after_timestamp() throws IOException { public void composite_date_histogram_daily() throws IOException { String ppl = sanitize(loadExpectedQuery("composite_date_histogram_daily.ppl")); timing(summary, "composite_date_histogram_daily", ppl); - String expected = loadExpectedPlan("composite_date_histogram_daily.yaml"); + String expected = loadExpectedPlan("big5/composite_date_histogram_daily.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -93,7 +93,7 @@ public void composite_date_histogram_daily() throws IOException { public void composite_terms_keyword() throws IOException { String ppl = sanitize(loadExpectedQuery("composite_terms_keyword.ppl")); timing(summary, "composite_terms_keyword", ppl); - String expected = loadExpectedPlan("composite_terms_keyword.yaml"); + String expected = loadExpectedPlan("big5/composite_terms_keyword.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -101,7 +101,7 @@ public void composite_terms_keyword() throws IOException { public void composite_terms() throws IOException { String ppl = sanitize(loadExpectedQuery("composite_terms.ppl")); timing(summary, "composite_terms", ppl); - String expected = loadExpectedPlan("composite_terms.yaml"); + String expected = loadExpectedPlan("big5/composite_terms.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -109,7 +109,7 @@ public void composite_terms() throws IOException { public void date_histogram_hourly_agg() throws IOException { String ppl = sanitize(loadExpectedQuery("date_histogram_hourly_agg.ppl")); timing(summary, "date_histogram_hourly_agg", ppl); - String expected = loadExpectedPlan("date_histogram_hourly_agg.yaml"); + String expected = loadExpectedPlan("big5/date_histogram_hourly_agg.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -117,7 +117,7 @@ public void date_histogram_hourly_agg() throws IOException { public void date_histogram_minute_agg() throws IOException { String ppl = sanitize(loadExpectedQuery("date_histogram_minute_agg.ppl")); timing(summary, "date_histogram_minute_agg", ppl); - String expected = loadExpectedPlan("date_histogram_minute_agg.yaml"); + String expected = loadExpectedPlan("big5/date_histogram_minute_agg.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -125,7 +125,7 @@ public void date_histogram_minute_agg() throws IOException { public void test_default() throws IOException { String ppl = sanitize(loadExpectedQuery("default.ppl")); timing(summary, "default", ppl); - String expected = loadExpectedPlan("default.yaml"); + String expected = loadExpectedPlan("big5/default.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -133,7 +133,7 @@ public void test_default() throws IOException { public void desc_sort_timestamp() throws IOException { String ppl = sanitize(loadExpectedQuery("desc_sort_timestamp.ppl")); timing(summary, "desc_sort_timestamp", ppl); - String expected = loadExpectedPlan("desc_sort_timestamp.yaml"); + String expected = loadExpectedPlan("big5/desc_sort_timestamp.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -141,7 +141,7 @@ public void desc_sort_timestamp() throws IOException { public void desc_sort_timestamp_can_match_shortcut() throws IOException { String ppl = sanitize(loadExpectedQuery("desc_sort_timestamp_can_match_shortcut.ppl")); timing(summary, "desc_sort_timestamp_can_match_shortcut", ppl); - String expected = loadExpectedPlan("desc_sort_timestamp_can_match_shortcut.yaml"); + String expected = loadExpectedPlan("big5/desc_sort_timestamp_can_match_shortcut.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -149,7 +149,7 @@ public void desc_sort_timestamp_can_match_shortcut() throws IOException { public void desc_sort_timestamp_no_can_match_shortcut() throws IOException { String ppl = sanitize(loadExpectedQuery("desc_sort_timestamp_no_can_match_shortcut.ppl")); timing(summary, "desc_sort_timestamp_no_can_match_shortcut", ppl); - String expected = loadExpectedPlan("desc_sort_timestamp_no_can_match_shortcut.yaml"); + String expected = loadExpectedPlan("big5/desc_sort_timestamp_no_can_match_shortcut.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -157,7 +157,7 @@ public void desc_sort_timestamp_no_can_match_shortcut() throws IOException { public void desc_sort_with_after_timestamp() throws IOException { String ppl = sanitize(loadExpectedQuery("desc_sort_with_after_timestamp.ppl")); timing(summary, "desc_sort_with_after_timestamp", ppl); - String expected = loadExpectedPlan("desc_sort_with_after_timestamp.yaml"); + String expected = loadExpectedPlan("big5/desc_sort_with_after_timestamp.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -165,7 +165,7 @@ public void desc_sort_with_after_timestamp() throws IOException { public void keyword_in_range() throws IOException { String ppl = sanitize(loadExpectedQuery("keyword_in_range.ppl")); timing(summary, "keyword_in_range", ppl); - String expected = loadExpectedPlan("keyword_in_range.yaml"); + String expected = loadExpectedPlan("big5/keyword_in_range.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -173,7 +173,7 @@ public void keyword_in_range() throws IOException { public void keyword_terms() throws IOException { String ppl = sanitize(loadExpectedQuery("keyword_terms.ppl")); timing(summary, "keyword_terms", ppl); - String expected = loadExpectedPlan("keyword_terms.yaml"); + String expected = loadExpectedPlan("big5/keyword_terms.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -181,7 +181,7 @@ public void keyword_terms() throws IOException { public void keyword_terms_low_cardinality() throws IOException { String ppl = sanitize(loadExpectedQuery("keyword_terms_low_cardinality.ppl")); timing(summary, "keyword_terms_low_cardinality", ppl); - String expected = loadExpectedPlan("keyword_terms_low_cardinality.yaml"); + String expected = loadExpectedPlan("big5/keyword_terms_low_cardinality.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -189,7 +189,7 @@ public void keyword_terms_low_cardinality() throws IOException { public void multi_terms_keyword() throws IOException { String ppl = sanitize(loadExpectedQuery("multi_terms_keyword.ppl")); timing(summary, "multi_terms_keyword", ppl); - String expected = loadExpectedPlan("multi_terms_keyword.yaml"); + String expected = loadExpectedPlan("big5/multi_terms_keyword.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -197,7 +197,7 @@ public void multi_terms_keyword() throws IOException { public void query_string_on_message() throws IOException { String ppl = sanitize(loadExpectedQuery("query_string_on_message.ppl")); timing(summary, "query_string_on_message", ppl); - String expected = loadExpectedPlan("query_string_on_message.yaml"); + String expected = loadExpectedPlan("big5/query_string_on_message.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -205,7 +205,7 @@ public void query_string_on_message() throws IOException { public void query_string_on_message_filtered() throws IOException { String ppl = sanitize(loadExpectedQuery("query_string_on_message_filtered.ppl")); timing(summary, "query_string_on_message_filtered", ppl); - String expected = loadExpectedPlan("query_string_on_message_filtered.yaml"); + String expected = loadExpectedPlan("big5/query_string_on_message_filtered.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -213,7 +213,7 @@ public void query_string_on_message_filtered() throws IOException { public void query_string_on_message_filtered_sorted_num() throws IOException { String ppl = sanitize(loadExpectedQuery("query_string_on_message_filtered_sorted_num.ppl")); timing(summary, "query_string_on_message_filtered_sorted_num", ppl); - String expected = loadExpectedPlan("query_string_on_message_filtered_sorted_num.yaml"); + String expected = loadExpectedPlan("big5/query_string_on_message_filtered_sorted_num.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -221,7 +221,7 @@ public void query_string_on_message_filtered_sorted_num() throws IOException { public void range() throws IOException { String ppl = sanitize(loadExpectedQuery("range.ppl")); timing(summary, "range", ppl); - String expected = loadExpectedPlan("range.yaml"); + String expected = loadExpectedPlan("big5/range.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -229,7 +229,7 @@ public void range() throws IOException { public void range_auto_date_histo() throws IOException { String ppl = sanitize(loadExpectedQuery("range_auto_date_histo.ppl")); timing(summary, "range_auto_date_histo", ppl); - String expected = loadExpectedPlan("range_auto_date_histo.yaml"); + String expected = loadExpectedPlan("big5/range_auto_date_histo.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -237,7 +237,7 @@ public void range_auto_date_histo() throws IOException { public void range_auto_date_histo_with_metrics() throws IOException { String ppl = sanitize(loadExpectedQuery("range_auto_date_histo_with_metrics.ppl")); timing(summary, "range_auto_date_histo_with_metrics", ppl); - String expected = loadExpectedPlan("range_auto_date_histo_with_metrics.yaml"); + String expected = loadExpectedPlan("big5/range_auto_date_histo_with_metrics.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -245,7 +245,7 @@ public void range_auto_date_histo_with_metrics() throws IOException { public void range_numeric() throws IOException { String ppl = sanitize(loadExpectedQuery("range_numeric.ppl")); timing(summary, "range_numeric", ppl); - String expected = loadExpectedPlan("range_numeric.yaml"); + String expected = loadExpectedPlan("big5/range_numeric.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -254,7 +254,8 @@ public void range_field_conjunction_big_range_big_term_query() throws IOExceptio String ppl = sanitize(loadExpectedQuery("range_field_conjunction_big_range_big_term_query.ppl")); timing(summary, "range_field_conjunction_big_range_big_term_query", ppl); - String expected = loadExpectedPlan("range_field_conjunction_big_range_big_term_query.yaml"); + String expected = + loadExpectedPlan("big5/range_field_conjunction_big_range_big_term_query.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -263,7 +264,8 @@ public void range_field_conjunction_small_range_big_term_query() throws IOExcept String ppl = sanitize(loadExpectedQuery("range_field_conjunction_small_range_big_term_query.ppl")); timing(summary, "range_field_conjunction_small_range_big_term_query", ppl); - String expected = loadExpectedPlan("range_field_conjunction_small_range_big_term_query.yaml"); + String expected = + loadExpectedPlan("big5/range_field_conjunction_small_range_big_term_query.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -272,7 +274,8 @@ public void range_field_conjunction_small_range_small_term_query() throws IOExce String ppl = sanitize(loadExpectedQuery("range_field_conjunction_small_range_small_term_query.ppl")); timing(summary, "range_field_conjunction_small_range_small_term_query", ppl); - String expected = loadExpectedPlan("range_field_conjunction_small_range_small_term_query.yaml"); + String expected = + loadExpectedPlan("big5/range_field_conjunction_small_range_small_term_query.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -281,7 +284,8 @@ public void range_field_disjunction_big_range_small_term_query() throws IOExcept String ppl = sanitize(loadExpectedQuery("range_field_disjunction_big_range_small_term_query.ppl")); timing(summary, "range_field_disjunction_big_range_small_term_query", ppl); - String expected = loadExpectedPlan("range_field_disjunction_big_range_small_term_query.yaml"); + String expected = + loadExpectedPlan("big5/range_field_disjunction_big_range_small_term_query.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -289,7 +293,7 @@ public void range_field_disjunction_big_range_small_term_query() throws IOExcept public void range_with_asc_sort() throws IOException { String ppl = sanitize(loadExpectedQuery("range_with_asc_sort.ppl")); timing(summary, "range_with_asc_sort", ppl); - String expected = loadExpectedPlan("range_with_asc_sort.yaml"); + String expected = loadExpectedPlan("big5/range_with_asc_sort.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -297,7 +301,7 @@ public void range_with_asc_sort() throws IOException { public void range_with_desc_sort() throws IOException { String ppl = sanitize(loadExpectedQuery("range_with_desc_sort.ppl")); timing(summary, "range_with_desc_sort", ppl); - String expected = loadExpectedPlan("range_with_desc_sort.yaml"); + String expected = loadExpectedPlan("big5/range_with_desc_sort.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -305,7 +309,7 @@ public void range_with_desc_sort() throws IOException { public void scroll() throws IOException { String ppl = sanitize(loadExpectedQuery("scroll.ppl")); timing(summary, "scroll", ppl); - String expected = loadExpectedPlan("scroll.yaml"); + String expected = loadExpectedPlan("big5/scroll.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -313,7 +317,7 @@ public void scroll() throws IOException { public void sort_keyword_can_match_shortcut() throws IOException { String ppl = sanitize(loadExpectedQuery("sort_keyword_can_match_shortcut.ppl")); timing(summary, "sort_keyword_can_match_shortcut", ppl); - String expected = loadExpectedPlan("sort_keyword_can_match_shortcut.yaml"); + String expected = loadExpectedPlan("big5/sort_keyword_can_match_shortcut.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -321,7 +325,7 @@ public void sort_keyword_can_match_shortcut() throws IOException { public void sort_keyword_no_can_match_shortcut() throws IOException { String ppl = sanitize(loadExpectedQuery("sort_keyword_no_can_match_shortcut.ppl")); timing(summary, "sort_keyword_no_can_match_shortcut", ppl); - String expected = loadExpectedPlan("sort_keyword_no_can_match_shortcut.yaml"); + String expected = loadExpectedPlan("big5/sort_keyword_no_can_match_shortcut.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -329,7 +333,7 @@ public void sort_keyword_no_can_match_shortcut() throws IOException { public void sort_numeric_asc() throws IOException { String ppl = sanitize(loadExpectedQuery("sort_numeric_asc.ppl")); timing(summary, "sort_numeric_asc", ppl); - String expected = loadExpectedPlan("sort_numeric_asc.yaml"); + String expected = loadExpectedPlan("big5/sort_numeric_asc.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -337,7 +341,7 @@ public void sort_numeric_asc() throws IOException { public void sort_numeric_asc_with_match() throws IOException { String ppl = sanitize(loadExpectedQuery("sort_numeric_asc_with_match.ppl")); timing(summary, "sort_numeric_asc_with_match", ppl); - String expected = loadExpectedPlan("sort_numeric_asc_with_match.yaml"); + String expected = loadExpectedPlan("big5/sort_numeric_asc_with_match.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -345,7 +349,7 @@ public void sort_numeric_asc_with_match() throws IOException { public void sort_numeric_desc() throws IOException { String ppl = sanitize(loadExpectedQuery("sort_numeric_desc.ppl")); timing(summary, "sort_numeric_desc", ppl); - String expected = loadExpectedPlan("sort_numeric_desc.yaml"); + String expected = loadExpectedPlan("big5/sort_numeric_desc.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -353,7 +357,7 @@ public void sort_numeric_desc() throws IOException { public void sort_numeric_desc_with_match() throws IOException { String ppl = sanitize(loadExpectedQuery("sort_numeric_desc_with_match.ppl")); timing(summary, "sort_numeric_desc_with_match", ppl); - String expected = loadExpectedPlan("sort_numeric_desc_with_match.yaml"); + String expected = loadExpectedPlan("big5/sort_numeric_desc_with_match.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -361,7 +365,7 @@ public void sort_numeric_desc_with_match() throws IOException { public void term() throws IOException { String ppl = sanitize(loadExpectedQuery("term.ppl")); timing(summary, "term", ppl); - String expected = loadExpectedPlan("term.yaml"); + String expected = loadExpectedPlan("big5/term.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -369,7 +373,7 @@ public void term() throws IOException { public void terms_significant_1() throws IOException { String ppl = sanitize(loadExpectedQuery("terms_significant_1.ppl")); timing(summary, "terms_significant_1", ppl); - String expected = loadExpectedPlan("terms_significant_1.yaml"); + String expected = loadExpectedPlan("big5/terms_significant_1.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -377,7 +381,7 @@ public void terms_significant_1() throws IOException { public void terms_significant_2() throws IOException { String ppl = sanitize(loadExpectedQuery("terms_significant_2.ppl")); timing(summary, "terms_significant_2", ppl); - String expected = loadExpectedPlan("terms_significant_2.yaml"); + String expected = loadExpectedPlan("big5/terms_significant_2.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -385,7 +389,7 @@ public void terms_significant_2() throws IOException { public void range_agg_1() throws IOException { String ppl = sanitize(loadExpectedQuery("range_agg_1.ppl")); timing(summary, "range_agg_1", ppl); - String expected = loadExpectedPlan("range_agg_1.yaml"); + String expected = loadExpectedPlan("big5/range_agg_1.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -393,7 +397,7 @@ public void range_agg_1() throws IOException { public void range_agg_2() throws IOException { String ppl = sanitize(loadExpectedQuery("range_agg_2.ppl")); timing(summary, "range_agg_2", ppl); - String expected = loadExpectedPlan("range_agg_2.yaml"); + String expected = loadExpectedPlan("big5/range_agg_2.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -401,7 +405,7 @@ public void range_agg_2() throws IOException { public void cardinality_agg_high() throws IOException { String ppl = sanitize(loadExpectedQuery("cardinality_agg_high.ppl")); timing(summary, "cardinality_agg_high", ppl); - String expected = loadExpectedPlan("cardinality_agg_high.yaml"); + String expected = loadExpectedPlan("big5/cardinality_agg_high.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -409,7 +413,7 @@ public void cardinality_agg_high() throws IOException { public void cardinality_agg_high_2() throws IOException { String ppl = sanitize(loadExpectedQuery("cardinality_agg_high_2.ppl")); timing(summary, "cardinality_agg_high_2", ppl); - String expected = loadExpectedPlan("cardinality_agg_high_2.yaml"); + String expected = loadExpectedPlan("big5/cardinality_agg_high_2.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } @@ -417,7 +421,7 @@ public void cardinality_agg_high_2() throws IOException { public void cardinality_agg_low() throws IOException { String ppl = sanitize(loadExpectedQuery("cardinality_agg_low.ppl")); timing(summary, "cardinality_agg_low", ppl); - String expected = loadExpectedPlan("cardinality_agg_low.yaml"); + String expected = loadExpectedPlan("big5/cardinality_agg_low.yaml"); assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/asc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/asc_sort_timestamp.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/asc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/asc_sort_timestamp_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/asc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/asc_sort_timestamp_no_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_timestamp_no_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/asc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_with_after_timestamp.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/asc_sort_with_after_timestamp.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/asc_sort_with_after_timestamp.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/cardinality_agg_high.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/cardinality_agg_high.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/cardinality_agg_high.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/cardinality_agg_high.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/cardinality_agg_high_2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/cardinality_agg_high_2.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/cardinality_agg_high_2.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/cardinality_agg_high_2.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/cardinality_agg_low.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/cardinality_agg_low.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/cardinality_agg_low.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/cardinality_agg_low.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/composite_date_histogram_daily.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/composite_terms.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/composite_terms_keyword.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/date_histogram_hourly_agg.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/date_histogram_minute_agg.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/default.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/default.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/default.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/desc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/desc_sort_timestamp.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/desc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/desc_sort_timestamp_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/desc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/desc_sort_timestamp_no_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_timestamp_no_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/desc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_with_after_timestamp.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/desc_sort_with_after_timestamp.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/desc_sort_with_after_timestamp.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/keyword_in_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/keyword_in_range.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/keyword_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml similarity index 85% rename from integ-test/src/test/resources/expectedOutput/calcite/keyword_terms.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml index da777dc2784..30eb5cfddab 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/keyword_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($34)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},station=COUNT()), PROJECT->[station, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->500, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":500,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"station":"desc"},{"_key":"asc"}]},"aggregations":{"station":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},station=COUNT()), PROJECT->[station, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->500, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":500,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/keyword_terms_low_cardinality.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml similarity index 85% rename from integ-test/src/test/resources/expectedOutput/calcite/keyword_terms_low_cardinality.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml index fd4f1b547e3..ddb924bedf5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/keyword_terms_low_cardinality.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_terms_low_cardinality.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($34)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},country=COUNT()), PROJECT->[country, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->50, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":50,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"country":"desc"},{"_key":"asc"}]},"aggregations":{"country":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},country=COUNT()), PROJECT->[country, aws.cloudwatch.log_stream], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->50, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","size":50,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml similarity index 90% rename from integ-test/src/test/resources/expectedOutput/calcite/multi_terms_keyword.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml index 4b52e71862f..89708ca4d4b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/query_string_on_message.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/query_string_on_message.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/query_string_on_message_filtered.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/query_string_on_message_filtered.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/query_string_on_message_filtered_sorted_num.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/query_string_on_message_filtered_sorted_num.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/query_string_on_message_filtered_sorted_num.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_agg_1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_agg_1.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_agg_1.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_agg_1.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_agg_2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_agg_2.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_agg_2.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_agg_2.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_auto_date_histo.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_auto_date_histo.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_auto_date_histo.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_auto_date_histo.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_auto_date_histo_with_metrics.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_auto_date_histo_with_metrics.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_auto_date_histo_with_metrics.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_auto_date_histo_with_metrics.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_field_conjunction_big_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_field_conjunction_big_range_big_term_query.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_big_range_big_term_query.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_field_conjunction_small_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_big_term_query.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_field_conjunction_small_range_big_term_query.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_big_term_query.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_field_conjunction_small_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_small_term_query.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_field_conjunction_small_range_small_term_query.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_conjunction_small_range_small_term_query.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_field_disjunction_big_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_disjunction_big_range_small_term_query.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_field_disjunction_big_range_small_term_query.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_field_disjunction_big_range_small_term_query.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_numeric.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_numeric.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_numeric.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_with_asc_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_with_asc_sort.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_asc_sort.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/range_with_desc_sort.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/range_with_desc_sort.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/range_with_desc_sort.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/scroll.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/scroll.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/scroll.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/scroll.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/sort_keyword_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/sort_keyword_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/sort_keyword_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/sort_keyword_no_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/sort_keyword_no_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/sort_numeric_asc.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/sort_numeric_asc.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/sort_numeric_asc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/sort_numeric_asc_with_match.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_asc_with_match.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/sort_numeric_desc.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/sort_numeric_desc.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/sort_numeric_desc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/sort_numeric_desc_with_match.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/sort_numeric_desc_with_match.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/term.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/term.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/terms_significant_1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/terms_significant_1.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/terms_significant_2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/calcite/terms_significant_2.yaml rename to integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml index b18a08c410a..8c7797a4ad7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($63, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($63, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml index 2b73585ff73..3b0000ec80a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($63, '')]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID"],"excludes":[]},"aggregations":{"SearchEngineID|SearchPhrase":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID"],"excludes":[]},"aggregations":{"SearchEngineID|SearchPhrase":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml index aad05d10c58..4110e1a12d0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q16.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($84)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), UserID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID":{"terms":{"field":"UserID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), UserID], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID":{"terms":{"field":"UserID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml index 2f2b2f0a897..46855028754 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q17.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($84), IS NOT NULL($63))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml index 54d8b79cbc3..95e42c3147e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q19.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110], m=[EXTRACT('minute':VARCHAR, $17)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), UserID, m, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), UserID, m, SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"UserID|m|SearchPhrase":{"multi_terms":{"terms":[{"field":"UserID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCZnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJFWFRSQUNUIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAibnVsbGFibGUiOiB0cnVlCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0],"DIGESTS":["minute","EventTime"]}},"value_type":"long"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml index c40758c1b3d..513568e063b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase], FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase], FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml index 220e94f3bbb..97f48a87f28 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q34.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($26)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, URL], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q35.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q35.yaml index da70cfee61a..439c445af4b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q35.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q35.yaml @@ -10,4 +10,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], c=[$t1], const=[$t2], URL=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q36.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q36.yaml index 4f05d895b3a..55560215771 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q36.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q36.yaml @@ -10,4 +10,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[-($t0, $t2)], expr#4=[2], expr#5=[-($t0, $t4)], expr#6=[3], expr#7=[-($t0, $t6)], c=[$t1], ClientIP=[$t0], ClientIP - 1=[$t3], ClientIP - 2=[$t5], ClientIP - 3=[$t7]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->IS NOT NULL($76), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"ClientIP","boost":1.0}},"aggregations":{"ClientIP":{"terms":{"field":"ClientIP","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->IS NOT NULL($76), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"ClientIP","boost":1.0}},"aggregations":{"ClientIP":{"terms":{"field":"ClientIP","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml index 71446de8af9..ffe16fc8444 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml index f41ff988614..97b0fd3db2f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, DontCountHits, IsRefresh, Title, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","DontCountHits","IsRefresh","Title","CounterID"],"excludes":[]},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, DontCountHits, IsRefresh, Title, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","DontCountHits","IsRefresh","Title","CounterID"],"excludes":[]},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml index f2fbd2057cb..8b26cbbb03b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, IsDownload, IsLink, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","IsDownload","IsLink","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, IsDownload, IsLink, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","IsDownload","IsLink","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml index b2ae45002ed..b7ae1f5e396 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q40.yaml @@ -13,4 +13,4 @@ calcite: EnumerableCalc(expr#0..5=[{inputs}], PageViews=[$t5], TraficSourceID=[$t0], SearchEngineID=[$t1], AdvEngineID=[$t2], Src=[$t3], Dst=[$t4]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, TraficSourceID, AdvEngineID, URL, SearchEngineID, IsRefresh, Referer, CounterID], FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","TraficSourceID","AdvEngineID","URL","SearchEngineID","IsRefresh","Referer","CounterID"],"excludes":[]},"aggregations":{"TraficSourceID|SearchEngineID|AdvEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"SearchEngineID"},{"field":"AdvEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGCnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAib3AiOiB7CiAgICAgICAgICAgICJuYW1lIjogIj0iLAogICAgICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgICAgIH0sCiAgICAgICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlNNQUxMSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiA0LAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, TraficSourceID, AdvEngineID, URL, SearchEngineID, IsRefresh, Referer, CounterID], FILTER->AND(=($7, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($5, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2, 3, 4},PageViews=COUNT()), SORT_AGG_METRICS->[5 DESC LAST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","TraficSourceID","AdvEngineID","URL","SearchEngineID","IsRefresh","Referer","CounterID"],"excludes":[]},"aggregations":{"TraficSourceID|SearchEngineID|AdvEngineID|Src|Dst":{"multi_terms":{"terms":[{"field":"TraficSourceID"},{"field":"SearchEngineID"},{"field":"AdvEngineID"},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGCnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiQU5EIiwKICAgICAgICAia2luZCI6ICJBTkQiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgIm9wIjogewogICAgICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAgICAgImtpbmQiOiAiRVFVQUxTIiwKICAgICAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICBdCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAib3AiOiB7CiAgICAgICAgICAgICJuYW1lIjogIj0iLAogICAgICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgICAgIH0sCiAgICAgICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlNNQUxMSU5UIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiA0LAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogNSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,0,2],"DIGESTS":["SearchEngineID",0,"AdvEngineID",0,"Referer",""]}}},{"field":"URL"}],"size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index 9bb4ac10e86..8c8a2359f51 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index 9f785bf7cc9..2f5e72fc1a0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"PageViews":"desc"},{"_key":"asc"}]},"aggregations":{"PageViews":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml index f57500d3809..8ebc6067a0a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[<>($19, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($19, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), AdvEngineID], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"desc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($19, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), AdvEngineID], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml index b837e4968d4..75389120405 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"state":{"terms":{"field":"state.keyword","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml index 44393bfce83..e60bbe90fdc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure3.yaml @@ -9,4 +9,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[cnt, span(birthdate,1d)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"cnt":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0},"aggregations":{"cnt":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[cnt, span(birthdate,1d)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","fixed_interval":"1d","offset":0,"order":[{"_count":"desc"},{"_key":"asc"}],"keyed":false,"min_doc_count":0}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml index 6bde95ecf0c..4caf7759fc6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), gender, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), gender, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"gender|state":{"multi_terms":{"terms":[{"field":"gender.keyword"},{"field":"state.keyword"}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml index ddbe8b2764e..13d8350c11f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_multi_terms_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_gender=[LOWER($4)], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), new_gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 ASC FIRST], PROJECT->[count(), new_gender, new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_gender|new_state":{"multi_terms":{"terms":[{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}}},{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}}}],"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml index 9292b81e5be..7e010cba2ad 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_sort_on_measure_script.yaml @@ -9,4 +9,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], new_state=[LOWER($7)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"count()":"asc"},{"_key":"asc"}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 ASC FIRST], PROJECT->[count(), new_state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"asc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/asc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/asc_sort_timestamp.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/asc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/asc_sort_timestamp_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/asc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/asc_sort_timestamp_no_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/asc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/asc_sort_with_after_timestamp.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/cardinality_agg_high.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/cardinality_agg_high.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/cardinality_agg_high_2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high_2.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/cardinality_agg_high_2.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high_2.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/cardinality_agg_low.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_low.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/cardinality_agg_low.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_low.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/composite_date_histogram_daily.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/composite_terms.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/composite_terms_keyword.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/date_histogram_hourly_agg.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/date_histogram_minute_agg.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/default.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/default.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/desc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/desc_sort_timestamp.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/desc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/desc_sort_timestamp_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/desc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/desc_sort_timestamp_no_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/desc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/desc_sort_with_after_timestamp.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/keyword_in_range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/keyword_in_range.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/keyword_terms.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/keyword_terms.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/keyword_terms_low_cardinality.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms_low_cardinality.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/keyword_terms_low_cardinality.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms_low_cardinality.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/multi_terms_keyword.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/multi_terms_keyword.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/multi_terms_keyword.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/query_string_on_message.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/query_string_on_message.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/query_string_on_message_filtered.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/query_string_on_message_filtered.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/query_string_on_message_filtered_sorted_num.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/query_string_on_message_filtered_sorted_num.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_agg_1.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_1.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_agg_1.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_1.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_agg_2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_2.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_agg_2.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_2.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_auto_date_histo.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_auto_date_histo.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_auto_date_histo_with_metrics.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo_with_metrics.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_auto_date_histo_with_metrics.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo_with_metrics.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_field_conjunction_big_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_field_conjunction_big_range_big_term_query.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_field_conjunction_small_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_field_conjunction_small_range_big_term_query.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_field_conjunction_small_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_field_conjunction_small_range_small_term_query.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_field_disjunction_big_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_field_disjunction_big_range_small_term_query.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_numeric.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_numeric.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_with_asc_sort.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_with_asc_sort.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/range_with_desc_sort.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/range_with_desc_sort.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/scroll.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/scroll.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/sort_keyword_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/sort_keyword_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/sort_keyword_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/sort_keyword_no_can_match_shortcut.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/sort_numeric_asc.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/sort_numeric_asc.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/sort_numeric_asc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/sort_numeric_asc_with_match.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/sort_numeric_desc.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/sort_numeric_desc.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/sort_numeric_desc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/sort_numeric_desc_with_match.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/term.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/term.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/terms_significant_1.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_1.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/terms_significant_1.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_1.yaml diff --git a/integ-test/src/test/resources/expectedOutput/ppl/terms_significant_2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_2.yaml similarity index 100% rename from integ-test/src/test/resources/expectedOutput/ppl/terms_significant_2.yaml rename to integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_2.yaml diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index d26027cf471..5d96caff9f9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -111,11 +111,12 @@ public void rePushDownSortAggMeasure( List collations, List fieldNames) { if (builderAndParser.getLeft().isEmpty()) return; if (builderAndParser.getLeft().getFirst() instanceof CompositeAggregationBuilder composite) { + boolean asc = collations.get(0).getDirection() == RelFieldCollation.Direction.ASCENDING; String path = getAggregationPath(collations, fieldNames, composite); BucketOrder bucketOrder = - collations.get(0).getDirection() == RelFieldCollation.Direction.ASCENDING - ? BucketOrder.aggregation(path, true) - : BucketOrder.aggregation(path, false); + composite.getSubAggregations().isEmpty() + ? BucketOrder.count(asc) + : BucketOrder.aggregation(path, asc); AggregationBuilder aggregationBuilder = null; if (composite.sources().size() == 1) { if (composite.sources().get(0) instanceof TermsValuesSourceBuilder terms @@ -310,17 +311,15 @@ private AggregationBuilder attachSubAggregations( Collection subAggregations, String path, AggregationBuilder aggregationBuilder) { - AggregatorFactories.Builder metricBuilder = new AggregatorFactories.Builder(); - if (subAggregations.isEmpty()) { - metricBuilder.addAggregator(AggregationBuilders.count(path).field("_index")); - } else { + if (!subAggregations.isEmpty()) { + AggregatorFactories.Builder metricBuilder = new AggregatorFactories.Builder(); subAggregations.forEach(metricBuilder::addAggregator); // the count aggregator may be eliminated by doc_count optimization, add it back if (subAggregations.stream().noneMatch(sub -> sub.getName().equals(path))) { metricBuilder.addAggregator(AggregationBuilders.count(path).field("_index")); } + aggregationBuilder.subAggregations(metricBuilder); } - aggregationBuilder.subAggregations(metricBuilder); return aggregationBuilder; } From b366994318ae7045798c11a4c6d5fec6dc478541 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Thu, 27 Nov 2025 13:33:23 +0800 Subject: [PATCH 83/99] Fix wrong parameter and return result logic for LogPatternAggFunction (#4868) Signed-off-by: Songkan Tang --- .../udf/udaf/LogPatternAggFunction.java | 8 ++- .../function/PatternParserFunctionImpl.java | 2 +- .../calcite/remote/CalcitePPLPatternsIT.java | 23 +++---- .../rest-api-spec/test/issues/4866.yml | 65 +++++++++++++++++++ .../opensearch/sql/ppl/parser/AstBuilder.java | 3 +- .../ppl/calcite/CalcitePPLPatternsTest.java | 33 ++++++++++ 6 files changed, 118 insertions(+), 16 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml diff --git a/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/LogPatternAggFunction.java b/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/LogPatternAggFunction.java index b60d53f145f..f93a0e7c49d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/LogPatternAggFunction.java +++ b/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/LogPatternAggFunction.java @@ -36,7 +36,7 @@ public LogParserAccumulator init() { @Override public Object result(LogParserAccumulator acc) { - if (acc.size() == 0) { + if (acc.size() == 0 && acc.logSize() == 0) { return null; } @@ -89,7 +89,7 @@ public LogParserAccumulator add( this.variableCountThreshold = variableCountThreshold; this.thresholdPercentage = thresholdPercentage; acc.evaluate(field); - if (bufferLimit > 0 && acc.size() == bufferLimit) { + if (bufferLimit > 0 && acc.logSize() == bufferLimit) { acc.partialMerge( maxSampleCount, variableCountThreshold, thresholdPercentage, showNumberedToken); acc.clearBuffer(); @@ -152,6 +152,10 @@ public static class LogParserAccumulator implements Accumulator { public Map> patternGroupMap = new HashMap<>(); public int size() { + return patternGroupMap.size(); + } + + public int logSize() { return logMessages.size(); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PatternParserFunctionImpl.java b/core/src/main/java/org/opensearch/sql/expression/function/PatternParserFunctionImpl.java index 878186cd3b4..e4f7f1f9d1c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PatternParserFunctionImpl.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PatternParserFunctionImpl.java @@ -105,7 +105,7 @@ public static Object evalAgg( @Parameter(name = "field") String field, @Parameter(name = "aggObject") Object aggObject, @Parameter(name = "showNumberedToken") Boolean showNumberedToken) { - if (Strings.isBlank(field)) { + if (Strings.isBlank(field) || aggObject == null) { return EMPTY_RESULT; } List> aggResult = (List>) aggObject; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java index cefb46e4b53..46df914e611 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java @@ -230,9 +230,8 @@ public void testBrainLabelMode_NotShowNumberedToken() throws IOException { "BLOCK* NameSystem.allocateBlock:" + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000296_0/part-00296." + " blk_-6620182933895093708", - "BLOCK* NameSystem.allocateBlock:" - + " /user/root/sortrand/_temporary/_task_<*>_<*>_r_<*>_<*>/part<*>" - + " blk_<*>")); + "<*> NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_<*>_<*>_r_<*>_<*>/part<*> blk_<*>")); } @Test @@ -268,21 +267,23 @@ public void testBrainLabelMode_ShowNumberedToken() throws IOException { "BLOCK* NameSystem.allocateBlock:" + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000296_0/part-00296." + " blk_-6620182933895093708", - "BLOCK* NameSystem.allocateBlock:" - + " /user/root/sortrand/_temporary/_task___r__/part" - + " blk_", + " NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task___r__/part" + + " blk_", ImmutableMap.of( "", - ImmutableList.of("200811092030"), + ImmutableList.of("BLOCK*"), "", - ImmutableList.of("0002"), + ImmutableList.of("200811092030"), "", - ImmutableList.of("000296"), + ImmutableList.of("0002"), "", - ImmutableList.of("0"), + ImmutableList.of("000296"), "", - ImmutableList.of("-00296."), + ImmutableList.of("0"), "", + ImmutableList.of("-00296."), + "", ImmutableList.of("-6620182933895093708")))); } diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml new file mode 100644 index 00000000000..e2ae4c86803 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml @@ -0,0 +1,65 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + bulk: + index: hdfs_logs + refresh: true + body: + - '{ "index": { "_id": 1 } }' + - '{ "date": "20081109", "time": "203615", "pid": 148, "level": "INFO", "component": "dfs.FSNamesystem", "content": "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to blk_-7017553867379051457 size 67108864" }' + - '{ "index": { "_id": 2 } }' + - '{ "date": "20081109", "time": "204132", "pid": 26, "level": "INFO", "component": "dfs.FSNamesystem", "content": "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added to blk_-3249711809227781266 size 67108864" }' + - '{ "index": { "_id": 3 } }' + - '{ "date": "20081109", "time": "204925", "pid": 663, "level": "WARN", "component": "dfs.DataNode$PacketResponder", "content": "PacketResponder failed for blk_6996194389878584395" }' + - '{ "index": { "_id": 4 } }' + - '{ "date": "20081109", "time": "205035", "pid": 31, "level": "WARN", "component": "dfs.DataNode$PacketResponder", "content": "PacketResponder failed for blk_-1547954353065580372" }' + + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + + +--- +"Patterns with specified max_sample_count should return correct result": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=hdfs_logs | patterns content method=brain mode=aggregation max_sample_count=2 variable_count_threshold=3' + - match: {"total": 2} + - match: {"schema": [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}]} + - match: {"datarows": [ + [ + "PacketResponder failed for blk_<*>", + 2, + [ + "PacketResponder failed for blk_6996194389878584395", + "PacketResponder failed for blk_-1547954353065580372" + ] + ], + [ + "BLOCK* NameSystem.addStoredBlock: blockMap updated: <*IP*> is added to blk_<*> size <*>", + 2, + [ + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to blk_-7017553867379051457 size 67108864", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added to blk_-3249711809227781266 size 67108864" + ] + ] + ]} + diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index ad1cc6926cf..69aca5e42ae 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -947,8 +947,7 @@ public UnresolvedPlan visitPatternsCommand(OpenSearchPPLParser.PatternsCommandCo AstDSL.intLiteral(settings.getSettingValue(Key.PATTERN_MAX_SAMPLE_COUNT))); Literal patternBufferLimit = cmdOptions.getOrDefault( - "max_sample_count", - AstDSL.intLiteral(settings.getSettingValue(Key.PATTERN_BUFFER_LIMIT))); + "buffer_limit", AstDSL.intLiteral(settings.getSettingValue(Key.PATTERN_BUFFER_LIMIT))); Literal showNumberedToken = cmdOptions.getOrDefault( "show_numbered_token", diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java index 3d3bb5b6a1b..c272453b829 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java @@ -326,6 +326,39 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForSimplePa verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testPatternsAggregationMode_SpecifyAllParameters_ForBrainMethod() { + String ppl = + "source=EMP | patterns ENAME method=BRAIN mode=aggregation max_sample_count=2" + + " buffer_limit=1000 show_numbered_token=false variable_count_threshold=3" + + " frequency_threshold_percentage=0.1"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(patterns_field=[SAFE_CAST(ITEM($1, 'pattern'))]," + + " pattern_count=[SAFE_CAST(ITEM($1, 'pattern_count'))]," + + " sample_logs=[SAFE_CAST(ITEM($1, 'sample_logs'))])\n" + + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n" + + " LogicalAggregate(group=[{}], patterns_field=[pattern($0, $1, $2, $3, $4, $5)])\n" + + " LogicalProject(ENAME=[$1], $f8=[2], $f9=[1000], $f10=[false]," + + " $f11=[0.1:DECIMAL(2, 1)], $f12=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " Uncollect\n" + + " LogicalProject(patterns_field=[$cor0.patterns_field])\n" + + " LogicalValues(tuples=[[{ 0 }]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT TRY_CAST(`t20`.`patterns_field`['pattern'] AS STRING) `patterns_field`," + + " TRY_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT) `pattern_count`," + + " TRY_CAST(`t20`.`patterns_field`['sample_logs'] AS ARRAY< STRING >) `sample_logs`\n" + + "FROM (SELECT `pattern`(`ENAME`, 2, 1000, FALSE, 0.1, 3) `patterns_field`\n" + + "FROM `scott`.`EMP`) `$cor0`,\n" + + "LATERAL UNNEST((SELECT `$cor0`.`patterns_field`\n" + + "FROM (VALUES (0)) `t` (`ZERO`))) `t20` (`patterns_field`)"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testPatternsAggregationMode_NotShowNumberedToken_ForBrainMethod() { String ppl = "source=EMP | patterns ENAME method=BRAIN mode=aggregation"; From 12af315314adee0f119e60cfa827dfe8f26c6da2 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Fri, 28 Nov 2025 11:33:55 +0800 Subject: [PATCH 84/99] Remove unnecessary filter for DateHistogram aggregation (#4877) * Remove unnecessary filter for DataHistogram aggregation Signed-off-by: Lantao Jin * revert changes of interval selection in DataHistogram Signed-off-by: Lantao Jin * typo Signed-off-by: Lantao Jin * revert one it Signed-off-by: Lantao Jin --------- Signed-off-by: Lantao Jin --- .../org/opensearch/sql/ppl/ExplainIT.java | 16 +- .../opensearch/sql/ppl/PPLIntegTestCase.java | 2 + .../org/opensearch/sql/util/MatcherUtils.java | 7 +- .../composite_date_histogram_daily.ppl | 3 +- .../big5/queries/composite_terms.ppl | 3 +- .../big5/queries/composite_terms_keyword.ppl | 3 +- .../queries/optimized/composite_terms.ppl | 3 +- .../optimized/composite_terms_keyword.ppl | 3 +- .../big5/composite_date_histogram_daily.yaml | 15 +- .../calcite/big5/composite_terms.yaml | 4 +- .../calcite/big5/composite_terms_keyword.yaml | 4 +- .../big5/date_histogram_minute_agg.yaml | 2 +- .../calcite/explain_filter_with_search.yaml | 2 +- .../calcite/explain_stats_by_timespan.json | 6 - .../calcite/explain_stats_by_timespan.yaml | 10 + .../calcite/explain_stats_by_timespan2.json | 6 - .../calcite/explain_stats_by_timespan2.yaml | 10 + .../explain_stats_by_timespan.json | 6 - .../explain_stats_by_timespan.yaml | 14 ++ .../explain_stats_by_timespan2.json | 6 - .../explain_stats_by_timespan2.yaml | 14 ++ .../big5/composite_date_histogram_daily.yaml | 32 ++- .../ppl/big5/composite_terms.yaml | 36 +-- .../ppl/big5/composite_terms_keyword.yaml | 40 +-- .../ppl/explain_stats_by_timespan.json | 15 -- .../ppl/explain_stats_by_timespan.yaml | 15 ++ .../ppl/explain_stats_by_timespan2.json | 15 -- .../ppl/explain_stats_by_timespan2.yaml | 15 ++ .../opensearch/request/PredicateAnalyzer.java | 9 +- .../dsl/BucketAggregationBuilder.java | 132 ---------- .../dsl/BucketAggregationBuilderTest.java | 234 ------------------ 31 files changed, 185 insertions(+), 497 deletions(-) delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.json create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.json create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java delete mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index 42a9ddfb98a..62eadd7ef5e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -464,22 +464,22 @@ public void testStatsBySpanNonBucketNullable() throws IOException { @Test public void testStatsByTimeSpan() throws IOException { - String expected = loadExpectedPlan("explain_stats_by_timespan.json"); - assertJsonEqualsIgnoreId( + String expected = loadExpectedPlan("explain_stats_by_timespan.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( String.format("source=%s | stats count() by span(birthdate,1m)", TEST_INDEX_BANK))); - expected = loadExpectedPlan("explain_stats_by_timespan2.json"); - assertJsonEqualsIgnoreId( + expected = loadExpectedPlan("explain_stats_by_timespan2.yaml"); + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( String.format("source=%s | stats count() by span(birthdate,1M)", TEST_INDEX_BANK))); // bucket_nullable doesn't impact by-span-time - assertJsonEqualsIgnoreId( + assertYamlEqualsIgnoreId( expected, - explainQueryToString( + explainQueryYaml( String.format( "source=%s | stats bucket_nullable=false count() by span(birthdate,1M)", TEST_INDEX_BANK))); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java index 5c2e45f1af1..81eaad47298 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java @@ -59,6 +59,8 @@ protected String executeQueryToString(String query) throws IOException { return getResponseBody(response, true); } + /** Deprecated, use {@link #explainQueryYaml(String)} */ + @Deprecated protected String explainQueryToString(String query) throws IOException { return explainQueryToString(query, false); } diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index b7e030e1295..bf9b214bd44 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -411,7 +411,11 @@ public static void assertJsonEquals(String expected, String actual) { JsonParser.parseString(eliminatePid(actual))); } - /** Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan. */ + /** + * Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan. + * Deprecated, use {@link #assertYamlEqualsIgnoreId(String, String)} + */ + @Deprecated public static void assertJsonEqualsIgnoreId(String expected, String actual) { assertJsonEquals(cleanUpId(expected), cleanUpId(actual)); } @@ -434,6 +438,7 @@ private static String eliminatePid(String s) { return s.replaceAll("pitId=[^,]+,", "pitId=*,"); } + /** Compare two YAML strings are equals with ignoring the RelNode id in the Calcite plan. */ public static void assertYamlEqualsIgnoreId(String expectedYaml, String actualYaml) { String cleanedYaml = cleanUpYaml(actualYaml); assertYamlEquals(expectedYaml, cleanedYaml); diff --git a/integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl b/integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl index 656289b0603..10679eb2ea6 100644 --- a/integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl +++ b/integ-test/src/test/resources/big5/queries/composite_date_histogram_daily.ppl @@ -31,4 +31,5 @@ */ source = big5 | where `@timestamp` >= '2022-12-30 00:00:00' and `@timestamp` < '2023-01-07 12:00:00' -| stats count() by span(`@timestamp`, 1d) \ No newline at end of file +| stats count() by span(`@timestamp`, 1d) +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/composite_terms.ppl b/integ-test/src/test/resources/big5/queries/composite_terms.ppl index 07edca09e69..4a9a4a3244c 100644 --- a/integ-test/src/test/resources/big5/queries/composite_terms.ppl +++ b/integ-test/src/test/resources/big5/queries/composite_terms.ppl @@ -29,4 +29,5 @@ source = big5 | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats count() by `process.name`, `cloud.region` -| sort - `process.name`, + `cloud.region` \ No newline at end of file +| sort - `process.name`, + `cloud.region` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl b/integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl index 42b8c9585a4..e0a92c0b6e6 100644 --- a/integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl +++ b/integ-test/src/test/resources/big5/queries/composite_terms_keyword.ppl @@ -30,4 +30,5 @@ source = big5 | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream` -| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` \ No newline at end of file +| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl b/integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl index 97897e227de..6161a31a0c8 100644 --- a/integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl +++ b/integ-test/src/test/resources/big5/queries/optimized/composite_terms.ppl @@ -29,4 +29,5 @@ source = big5 | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats bucket_nullable = false count() by `process.name`, `cloud.region` -| sort - `process.name`, + `cloud.region` \ No newline at end of file +| sort - `process.name`, + `cloud.region` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl b/integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl index 04d12b4fb0e..224f5575aaa 100644 --- a/integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl +++ b/integ-test/src/test/resources/big5/queries/optimized/composite_terms_keyword.ppl @@ -30,4 +30,5 @@ source = big5 | where `@timestamp` >= '2023-01-02 00:00:00' and `@timestamp` < '2023-01-02 10:00:00' | stats bucket_nullable = false count() by `process.name`, `cloud.region`, `aws.cloudwatch.log_stream` -| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` \ No newline at end of file +| sort - `process.name`, + `cloud.region`, + `aws.cloudwatch.log_stream` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml index 9b69c67b74c..10023133a38 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml @@ -1,11 +1,12 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0]) - LogicalAggregate(group=[{0}], count()=[COUNT()]) - LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')]) - LogicalFilter(condition=[IS NOT NULL($17)]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalSort(fetch=[10]) + LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')]) + LogicalFilter(condition=[IS NOT NULL($17)]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"@timestamp","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml index 8720f023f80..cc3af323ddf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml @@ -1,7 +1,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first]) + LogicalSort(sort0=[$1], sort1=[$2], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], fetch=[10]) LogicalProject(count()=[$2], process.name=[$0], cloud.region=[$1]) LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14]) @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml index ac251d900f0..9e546a26dbf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml @@ -1,7 +1,7 @@ calcite: logical: | LogicalSystemLimit(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], dir2=[ASC-nulls-first], fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], dir2=[ASC-nulls-first]) + LogicalSort(sort0=[$1], sort1=[$2], sort2=[$3], dir0=[DESC-nulls-last], dir1=[ASC-nulls-first], dir2=[ASC-nulls-first], fetch=[10]) LogicalProject(count()=[$3], process.name=[$0], cloud.region=[$1], aws.cloudwatch.log_stream=[$2]) LogicalAggregate(group=[{0, 1, 2}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14], aws.cloudwatch.log_stream=[$34]) @@ -9,4 +9,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml index c715c2c2a42..44b15522967 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"@timestamp","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml index 29ebac7168f..bd8114a7989 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"birthdate","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json deleted file mode 100644 index f84aa0cb018..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1m)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml new file mode 100644 index 00000000000..b4384528c0c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], span(birthdate,1m)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')]) + LogicalFilter(condition=[IS NOT NULL($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1m)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json deleted file mode 100644 index 036547978b1..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1M)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"calendar_interval\":\"1M\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml new file mode 100644 index 00000000000..5021adf62b8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], span(birthdate,1M)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')]) + LogicalFilter(condition=[IS NOT NULL($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1M)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1M"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.json deleted file mode 100644 index 1b846e6c16e..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1m)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1m)=[$t0])\n EnumerableAggregate(group=[{0}], count()=[COUNT()])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1], expr#20=['m'], expr#21=[SPAN($t3, $t19, $t20)], expr#22=[IS NOT NULL($t3)], span(birthdate,1m)=[$t21], $condition=[$t22])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.yaml new file mode 100644 index 00000000000..3843b2bce4a --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], span(birthdate,1m)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(birthdate,1m)=[SPAN($3, 1, 'm')]) + LogicalFilter(condition=[IS NOT NULL($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1m)=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1], expr#20=['m'], expr#21=[SPAN($t3, $t19, $t20)], expr#22=[IS NOT NULL($t3)], span(birthdate,1m)=[$t21], $condition=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.json deleted file mode 100644 index 9cec9bcf190..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(birthdate,1M)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')])\n LogicalFilter(condition=[IS NOT NULL($3)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1M)=[$t0])\n EnumerableAggregate(group=[{0}], count()=[COUNT()])\n EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1], expr#20=['M'], expr#21=[SPAN($t3, $t19, $t20)], expr#22=[IS NOT NULL($t3)], span(birthdate,1M)=[$t21], $condition=[$t22])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.yaml new file mode 100644 index 00000000000..af739c44d85 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_stats_by_timespan2.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], span(birthdate,1M)=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(span(birthdate,1M)=[SPAN($3, 1, 'M')]) + LogicalFilter(condition=[IS NOT NULL($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], span(birthdate,1M)=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[1], expr#20=['M'], expr#21=[SPAN($t3, $t19, $t20)], expr#22=[IS NOT NULL($t3)], span(birthdate,1M)=[$t21], $condition=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml index 9a0882dc49a..073078ddf0d 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml @@ -3,17 +3,23 @@ root: description: fields: "[count(), span(`@timestamp`,1d)]" children: - - name: OpenSearchIndexScan + - name: LimitOperator description: - request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ - size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\"\ - :{\"@timestamp\":{\"from\":1672358400000,\"to\":null,\"include_lower\":true,\"\ - include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\":{\"from\"\ - :null,\"to\":1673092800000,\"include_lower\":true,\"include_upper\":false,\"\ - boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"aggregations\"\ - :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(`@timestamp`,1d)\"\ - :{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":false,\"\ - order\":\"asc\",\"fixed_interval\":\"1d\"}}}]},\"aggregations\":{\"count()\"\ - :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + limit: 10 + offset: 0 + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\"\ + :0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"\ + range\":{\"@timestamp\":{\"from\":1672358400000,\"to\":null,\"include_lower\"\ + :true,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\"\ + :{\"from\":null,\"to\":1673092800000,\"include_lower\":true,\"include_upper\"\ + :false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"\ + aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"\ + sources\":[{\"span(`@timestamp`,1d)\":{\"date_histogram\":{\"field\"\ + :\"@timestamp\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\"\ + :\"1d\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\"\ + :\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml index 481d9cdd423..6a0e0c660da 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml @@ -3,19 +3,25 @@ root: description: fields: "[count(), process.name, cloud.region]" children: - - name: OpenSearchIndexScan + - name: LimitOperator description: - request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ - size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\"\ - :{\"@timestamp\":{\"from\":1672617600000,\"to\":null,\"include_lower\":true,\"\ - include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\":{\"from\"\ - :null,\"to\":1672653600000,\"include_lower\":true,\"include_upper\":false,\"\ - boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"aggregations\"\ - :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"process.name\"\ - :{\"terms\":{\"field\":\"process.name\",\"missing_bucket\":true,\"missing_order\"\ - :\"last\",\"order\":\"desc\"}}},{\"cloud.region\":{\"terms\":{\"field\"\ - :\"cloud.region\",\"missing_bucket\":true,\"missing_order\":\"first\",\"\ - order\":\"asc\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"\ - field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + limit: 10 + offset: 0 + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\"\ + :0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"\ + range\":{\"@timestamp\":{\"from\":1672617600000,\"to\":null,\"include_lower\"\ + :true,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\"\ + :{\"from\":null,\"to\":1672653600000,\"include_lower\":true,\"include_upper\"\ + :false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"\ + aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"\ + sources\":[{\"process.name\":{\"terms\":{\"field\":\"process.name\"\ + ,\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"desc\"\ + }}},{\"cloud.region\":{\"terms\":{\"field\":\"cloud.region\",\"missing_bucket\"\ + :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ + :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ + \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml index a7f12407647..5c77f33d0cd 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml @@ -3,21 +3,27 @@ root: description: fields: "[count(), process.name, cloud.region, aws.cloudwatch.log_stream]" children: - - name: OpenSearchIndexScan + - name: LimitOperator description: - request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ - size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\"\ - :{\"@timestamp\":{\"from\":1672617600000,\"to\":null,\"include_lower\":true,\"\ - include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\":{\"from\"\ - :null,\"to\":1672653600000,\"include_lower\":true,\"include_upper\":false,\"\ - boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"aggregations\"\ - :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"process.name\"\ - :{\"terms\":{\"field\":\"process.name\",\"missing_bucket\":true,\"missing_order\"\ - :\"last\",\"order\":\"desc\"}}},{\"cloud.region\":{\"terms\":{\"field\"\ - :\"cloud.region\",\"missing_bucket\":true,\"missing_order\":\"first\",\"\ - order\":\"asc\"}}},{\"aws.cloudwatch.log_stream\":{\"terms\":{\"field\"\ - :\"aws.cloudwatch.log_stream\",\"missing_bucket\":true,\"missing_order\"\ - :\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"count()\":{\"value_count\"\ - :{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - children: [] \ No newline at end of file + limit: 10 + offset: 0 + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\"\ + :0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"\ + range\":{\"@timestamp\":{\"from\":1672617600000,\"to\":null,\"include_lower\"\ + :true,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"@timestamp\"\ + :{\"from\":null,\"to\":1672653600000,\"include_lower\":true,\"include_upper\"\ + :false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"\ + aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"\ + sources\":[{\"process.name\":{\"terms\":{\"field\":\"process.name\"\ + ,\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"desc\"\ + }}},{\"cloud.region\":{\"terms\":{\"field\":\"cloud.region\",\"missing_bucket\"\ + :true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"aws.cloudwatch.log_stream\"\ + :{\"terms\":{\"field\":\"aws.cloudwatch.log_stream\",\"missing_bucket\"\ + :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ + :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ + \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.json deleted file mode 100644 index 7d345202ce4..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[count(), span(birthdate,1m)]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - } -} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml new file mode 100644 index 00000000000..3fd26cb19a6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml @@ -0,0 +1,15 @@ +root: + name: ProjectOperator + description: + fields: "[count(), span(birthdate,1m)]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank,\ + \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ + :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\"\ + :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ + order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ + :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ + \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.json deleted file mode 100644 index a5b9e210f09..00000000000 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[count(), span(birthdate,1M)]" - }, - "children": [{ - "name": "OpenSearchIndexScan", - "description": { - "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\":{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"order\":\"asc\",\"calendar_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" - }, - "children": [] - }] - } -} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml new file mode 100644 index 00000000000..167328625f1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml @@ -0,0 +1,15 @@ +root: + name: ProjectOperator + description: + fields: "[count(), span(birthdate,1M)]" + children: + - name: OpenSearchIndexScan + description: + request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_bank,\ + \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ + :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\"\ + :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ + order\":\"asc\",\"calendar_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\"\ + :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ + \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + children: [] \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 43d06c5e6b1..1f28580ed70 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -658,11 +658,10 @@ private QueryExpression binary(RexCall call) { RexUnknownAs nullAs = getNullAsForSearch(call); QueryExpression finalExpression = switch (nullAs) { - // e.g. where isNotNull(a) and (a = 1 or a = 2) - // TODO: For this case, seems return `expression` should be equivalent - case FALSE -> - CompoundQueryExpression.and( - false, expression, QueryExpression.create(pair.getKey()).exists()); + // e.g. where isNotNull(a) and ( a = 1 or a = 2) + // For this case, return `expression` is equivalent + // But DSL `bool.must` could slow down the query, so we return `expression` + case FALSE -> expression; // e.g. where isNull(a) or a = 1 or a = 2 case TRUE -> CompoundQueryExpression.or( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java deleted file mode 100644 index 8fa2c916288..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.storage.script.aggregation.dsl; - -import static org.opensearch.sql.data.type.ExprCoreType.DATE; -import static org.opensearch.sql.data.type.ExprCoreType.TIME; -import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; -import static org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE; - -import java.util.List; -import java.util.stream.Collectors; -import org.opensearch.search.aggregations.BucketOrder; -import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder; -import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; -import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; -import org.opensearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; -import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; -import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; -import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig; -import org.opensearch.search.aggregations.support.ValueType; -import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; -import org.opensearch.sql.ast.expression.SpanUnit; -import org.opensearch.sql.expression.NamedExpression; -import org.opensearch.sql.expression.span.SpanExpression; -import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; -import org.opensearch.sql.opensearch.storage.serde.ExpressionSerializer; - -/** Bucket Aggregation Builder. */ -public class BucketAggregationBuilder { - - private final AggregationBuilderHelper helper; - - public BucketAggregationBuilder(ExpressionSerializer serializer) { - this.helper = new AggregationBuilderHelper(serializer); - } - - /** Build the ValuesSourceAggregationBuilder. */ - public ValuesSourceAggregationBuilder build(NamedExpression expr) { - if (expr.getDelegated() instanceof SpanExpression) { - SpanExpression spanExpr = (SpanExpression) expr.getDelegated(); - return buildHistogram( - expr.getName(), - spanExpr.getField().toString(), - spanExpr.getValue().valueOf().doubleValue(), - spanExpr.getUnit()); - } else { - TermsAggregationBuilder sourceBuilder = new TermsAggregationBuilder(expr.getName()); - sourceBuilder.size(AGGREGATION_BUCKET_SIZE); - sourceBuilder.order(BucketOrder.key(true)); - // Time types values are converted to LONG in ExpressionAggregationScript::execute - if ((expr.getDelegated().type() instanceof OpenSearchDateType - && List.of(TIMESTAMP, TIME, DATE) - .contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType())) - || List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) { - sourceBuilder.userValueTypeHint(ValueType.LONG); - } - return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script); - } - } - - /** Build the MultiTermsAggregationBuilder. */ - public MultiTermsAggregationBuilder buildMultipleTerms(List exprs) { - MultiTermsAggregationBuilder sourceBuilder = - new MultiTermsAggregationBuilder( - exprs.stream().map(NamedExpression::getName).collect(Collectors.joining("_"))); - sourceBuilder.terms( - exprs.stream() - .map( - expr -> { - MultiTermsValuesSourceConfig.Builder config = - new MultiTermsValuesSourceConfig.Builder(); - config.setFieldName(expr.getName()); - // Time types values are converted to LONG in ExpressionAggregationScript::execute - if ((expr.getDelegated().type() instanceof OpenSearchDateType - && List.of(TIMESTAMP, TIME, DATE) - .contains( - ((OpenSearchDateType) expr.getDelegated().type()) - .getExprCoreType())) - || List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) { - config.setUserValueTypeHint(ValueType.LONG); - } - return config.build(); - }) - .toList()); - sourceBuilder.size(AGGREGATION_BUCKET_SIZE); - return sourceBuilder; - } - - public static ValuesSourceAggregationBuilder buildHistogram( - String name, String field, Double value, SpanUnit unit) { - switch (unit) { - case NONE: - return new HistogramAggregationBuilder(name).field(field).interval(value); - case UNKNOWN: - throw new IllegalStateException("Invalid span unit"); - default: - return buildDateHistogram(name, field, value.intValue(), unit); - } - } - - public static ValuesSourceAggregationBuilder buildAutoDateHistogram( - String name, String field, Integer bucketSize) { - return new AutoDateHistogramAggregationBuilder(name).field(field).setNumBuckets(bucketSize); - } - - public static ValuesSourceAggregationBuilder buildDateHistogram( - String name, String field, Integer value, SpanUnit unit) { - String spanValue = value + unit.getName(); - switch (unit) { - case MILLISECOND: - case MS: - case SECOND: - case S: - case MINUTE: - case m: - case HOUR: - case H: - case DAY: - case D: - return new DateHistogramAggregationBuilder(name) - .field(field) - .fixedInterval(new DateHistogramInterval(spanValue)); - default: - return new DateHistogramAggregationBuilder(name) - .field(field) - .calendarInterval(new DateHistogramInterval(spanValue)); - } - } -} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java deleted file mode 100644 index 26b303fbf45..00000000000 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.storage.script.aggregation.dsl; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.Mockito.when; -import static org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS; -import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; -import static org.opensearch.sql.data.type.ExprCoreType.STRING; -import static org.opensearch.sql.expression.DSL.literal; -import static org.opensearch.sql.expression.DSL.named; -import static org.opensearch.sql.expression.DSL.ref; - -import java.util.Map; -import lombok.SneakyThrows; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DisplayNameGeneration; -import org.junit.jupiter.api.DisplayNameGenerator; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.EnumSource; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.core.common.bytes.BytesReference; -import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; -import org.opensearch.sql.data.type.ExprCoreType; -import org.opensearch.sql.data.type.ExprType; -import org.opensearch.sql.expression.DSL; -import org.opensearch.sql.expression.NamedExpression; -import org.opensearch.sql.expression.parse.ParseExpression; -import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; -import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; -import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; -import org.opensearch.sql.opensearch.storage.serde.ExpressionSerializer; - -@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) -@ExtendWith(MockitoExtension.class) -class BucketAggregationBuilderTest { - - @Mock private ExpressionSerializer serializer; - - private BucketAggregationBuilder aggregationBuilder; - - @BeforeEach - void set_up() { - aggregationBuilder = new BucketAggregationBuilder(serializer); - } - - @Test - void should_build_bucket_with_field() { - assertEquals( - "{\n" - + " \"age\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"age\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("age", ref("age", INTEGER)))); - } - - @Test - void should_build_bucket_with_literal() { - var literal = literal(1); - when(serializer.serialize(literal)).thenReturn("mock-serialize"); - assertEquals( - "{\n" - + " \"1\" : {\n" - + " \"terms\" : {\n" - + " \"script\" : {\n" - + " \"source\" :" - + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"mock-serialize\\\"}\",\n" - + " \"lang\" : \"opensearch_compounded_script\"\n" - + " },\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named(literal))); - } - - @Test - void should_build_bucket_with_keyword_field() { - assertEquals( - "{\n" - + " \"name\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"name.keyword\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery( - named( - "name", - ref( - "name", - OpenSearchTextType.of( - Map.of( - "words", - OpenSearchDataType.of(OpenSearchDataType.MappingType.Keyword))))))); - } - - @Test - void should_build_bucket_with_parse_expression() { - ParseExpression parseExpression = - DSL.regex(ref("name.keyword", STRING), DSL.literal("(?\\w+)"), DSL.literal("name")); - when(serializer.serialize(parseExpression)).thenReturn("mock-serialize"); - assertEquals( - "{\n" - + " \"name\" : {\n" - + " \"terms\" : {\n" - + " \"script\" : {\n" - + " \"source\" :" - + " \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"mock-serialize\\\"}\",\n" - + " \"lang\" : \"opensearch_compounded_script\"\n" - + " },\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("name", parseExpression))); - } - - @Test - void terms_bucket_for_opensearchdate_type_uses_long() { - OpenSearchDateType dataType = OpenSearchDateType.of(ExprCoreType.TIMESTAMP); - - assertEquals( - "{\n" - + " \"date\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"value_type\" : \"long\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("date", ref("date", dataType)))); - } - - @Test - void terms_bucket_for_opensearchdate_type_uses_long_false() { - OpenSearchDateType dataType = OpenSearchDateType.of(STRING); - - assertEquals( - "{\n" - + " \"date\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("date", ref("date", dataType)))); - } - - @ParameterizedTest(name = "{0}") - @EnumSource( - value = ExprCoreType.class, - names = {"TIMESTAMP", "TIME", "DATE"}) - void terms_bucket_for_datetime_types_uses_long(ExprType dataType) { - assertEquals( - "{\n" - + " \"date\" : {\n" - + " \"terms\" : {\n" - + " \"field\" : \"date\",\n" - + " \"value_type\" : \"long\",\n" - + " \"size\" : 1000,\n" - + " \"min_doc_count\" : 1,\n" - + " \"shard_min_doc_count\" : 0,\n" - + " \"show_term_doc_count_error\" : false,\n" - + " \"order\" : {\n" - + " \"_key\" : \"asc\"\n" - + " }\n" - + " }\n" - + " }\n" - + "}", - buildQuery(named("date", ref("date", dataType)))); - } - - @SneakyThrows - private String buildQuery(NamedExpression groupByExpression) { - XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); - builder.startObject(); - ValuesSourceAggregationBuilder sourceBuilder = aggregationBuilder.build(groupByExpression); - sourceBuilder.toXContent(builder, EMPTY_PARAMS); - builder.endObject(); - return BytesReference.bytes(builder).utf8ToString(); - } -} From aa4bfa824725ef09dbd75e97a05b08c69fad180f Mon Sep 17 00:00:00 2001 From: qianheng Date: Fri, 28 Nov 2025 16:32:10 +0800 Subject: [PATCH 85/99] Refactor alias type field by adding another project with alias (#4881) Signed-off-by: Heng Qian --- .../sql/calcite/CalciteRelNodeVisitor.java | 7 +++- .../calcite/plan/AliasFieldsWrappable.java | 36 +++++++++++++++++++ .../calcite/utils/OpenSearchTypeFactory.java | 2 ++ .../sql/calcite/remote/CalciteExplainIT.java | 13 +++++++ .../calcite/explain_alias_type_field.yaml | 9 +++++ .../explain_alias_type_field.yaml | 13 +++++++ .../scan/AbstractCalciteIndexScan.java | 9 ++++- .../scan/CalciteEnumerableIndexScan.java | 8 +---- .../storage/scan/CalciteLogicalIndexScan.java | 9 ++--- 9 files changed, 90 insertions(+), 16 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/AliasFieldsWrappable.java create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_alias_type_field.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_alias_type_field.yaml diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 6a556eccc92..8049afce825 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -143,6 +143,7 @@ import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; +import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; import org.opensearch.sql.calcite.plan.LogicalSystemLimit; import org.opensearch.sql.calcite.plan.LogicalSystemLimit.SystemLimitType; import org.opensearch.sql.calcite.plan.OpenSearchConstants; @@ -196,7 +197,11 @@ public RelNode visitRelation(Relation node, CalcitePlanContext context) { throw new CalciteUnsupportedException("information_schema is unsupported in Calcite"); } context.relBuilder.scan(node.getTableQualifiedName().getParts()); - return context.relBuilder.peek(); + RelNode scan = context.relBuilder.peek(); + if (scan instanceof AliasFieldsWrappable) { + return ((AliasFieldsWrappable) scan).wrapProjectForAliasFields(context.relBuilder); + } + return scan; } // This is a tool method to add an existed RelOptTable to builder stack, not used for now diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/AliasFieldsWrappable.java b/core/src/main/java/org/opensearch/sql/calcite/plan/AliasFieldsWrappable.java new file mode 100644 index 00000000000..1a4080ead47 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/AliasFieldsWrappable.java @@ -0,0 +1,36 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.plan; + +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilder; + +/** + * Wrapper for TableScan to add alias fields by creating another project with alias upon on it. This + * allows TableScan or Table to emit alias type fields in its schema, while it still supports + * resolving these fields used in the query. + */ +public interface AliasFieldsWrappable { + + Map getAliasMapping(); + + default RelNode wrapProjectForAliasFields(RelBuilder relBuilder) { + assert relBuilder.peek() instanceof AliasFieldsWrappable + : "The top node in RelBuilder must be AliasFieldsWrappable"; + Set> aliasFieldsSet = this.getAliasMapping().entrySet(); + // Adding alias referring to the original field. + List aliasFieldsNew = + aliasFieldsSet.stream() + .map(entry -> relBuilder.alias(relBuilder.field(entry.getValue()), entry.getKey())) + .toList(); + return relBuilder.projectPlus(aliasFieldsNew).peek(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index c505a431c27..17d99fb4fbb 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -313,6 +313,8 @@ public static RelDataType convertSchema(Table table) { Map fieldTypes = new LinkedHashMap<>(table.getFieldTypes()); fieldTypes.putAll(table.getReservedFieldTypes()); for (Entry entry : fieldTypes.entrySet()) { + // skip alias type fields when constructing schema + if (entry.getValue().getOriginalPath().isPresent()) continue; fieldNameList.add(entry.getKey()); typeList.add(OpenSearchTypeFactory.convertExprTypeToRelDataType(entry.getValue())); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index d0cac82b23f..84591568b07 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ALIAS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; @@ -42,6 +43,7 @@ public void init() throws Exception { loadIndex(Index.WORKER); loadIndex(Index.WORK_INFORMATION); loadIndex(Index.WEBLOG); + loadIndex(Index.DATA_TYPE_ALIAS); } @Override @@ -1953,4 +1955,15 @@ public void testDedupTextTypeNotPushdown() throws IOException { assertYamlEqualsIgnoreId( expected, explainQueryYaml(String.format("source=%s | dedup email", TEST_INDEX_BANK))); } + + @Test + public void testAliasTypeField() throws IOException { + String expected = loadExpectedPlan("explain_alias_type_field.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | fields alias_col | where alias_col > 10 | stats avg(alias_col)", + TEST_INDEX_ALIAS))); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_alias_type_field.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_alias_type_field.yaml new file mode 100644 index 00000000000..9465b18b2b1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_alias_type_field.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{}], avg(alias_col)=[AVG($0)]) + LogicalFilter(condition=[>($0, 10)]) + LogicalProject(alias_col=[$0]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_alias]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_alias]], PushDownContext=[[FILTER->>($0, 10), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},avg(alias_col)=AVG($0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"original_col":{"from":10,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"aggregations":{"avg(alias_col)":{"avg":{"field":"original_col"}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_alias_type_field.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_alias_type_field.yaml new file mode 100644 index 00000000000..a065c90865d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_alias_type_field.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{}], avg(alias_col)=[AVG($0)]) + LogicalFilter(condition=[>($0, 10)]) + LogicalProject(alias_col=[$0]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_alias]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[0], expr#3=[=($t1, $t2)], expr#4=[null:BIGINT], expr#5=[CASE($t3, $t4, $t0)], expr#6=[CAST($t5):DOUBLE], expr#7=[/($t6, $t1)], avg(alias_col)=[$t7]) + EnumerableAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)]) + EnumerableCalc(expr#0..6=[{inputs}], expr#7=[10], expr#8=[>($t0, $t7)], proj#0..6=[{exprs}], $condition=[$t8]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_alias]]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 29b240613b5..80ad06422e2 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -11,6 +11,7 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.stream.Stream; import lombok.Getter; import org.apache.calcite.adapter.enumerable.EnumerableMergeJoin; @@ -43,6 +44,7 @@ import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; @@ -61,7 +63,7 @@ /** An abstract relational operator representing a scan of an OpenSearchIndex type. */ @Getter -public abstract class AbstractCalciteIndexScan extends TableScan { +public abstract class AbstractCalciteIndexScan extends TableScan implements AliasFieldsWrappable { private static final Logger LOG = LogManager.getLogger(AbstractCalciteIndexScan.class); public final OpenSearchIndex osIndex; // The schema of this scan operator, it's initialized with the row type of the table, but may be @@ -252,6 +254,11 @@ protected abstract AbstractCalciteIndexScan buildScan( RelDataType schema, PushDownContext pushDownContext); + @Override + public Map getAliasMapping() { + return osIndex.getAliasMapping(); + } + protected List getCollationNames(List collations) { return collations.stream() .map(collation -> getRowType().getFieldNames().get(collation.getFieldIndex())) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index bc5a289f465..ddf14cfee38 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -115,7 +115,7 @@ public Enumerator enumerator() { OpenSearchRequestBuilder requestBuilder = pushDownContext.createRequestBuilder(); return new OpenSearchIndexEnumerator( osIndex.getClient(), - getFieldPath(), + getRowType().getFieldNames(), requestBuilder.getMaxResponseSize(), requestBuilder.getMaxResultWindow(), osIndex.buildRequest(requestBuilder), @@ -123,10 +123,4 @@ public Enumerator enumerator() { } }; } - - private List getFieldPath() { - return getRowType().getFieldNames().stream() - .map(f -> osIndex.getAliasMapping().getOrDefault(f, f)) - .toList(); - } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 2821aa037da..4aaac41cc75 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -272,15 +272,10 @@ public CalciteLogicalIndexScan pushDownProject(List selectedColumns) { // For aggregate, we do nothing on query builder but only change the schema of the scan. action = (AggregationBuilderAction) aggAction -> {}; } else { - Map aliasMapping = this.osIndex.getAliasMapping(); - // For alias types, we need to push down its original path instead of the alias name. - List projectedFields = - newSchema.getFieldNames().stream() - .map(fieldName -> aliasMapping.getOrDefault(fieldName, fieldName)) - .toList(); action = (OSRequestBuilderAction) - requestBuilder -> requestBuilder.pushDownProjectStream(projectedFields.stream()); + requestBuilder -> + requestBuilder.pushDownProjectStream(newSchema.getFieldNames().stream()); } newScan.pushDownContext.add(PushDownType.PROJECT, newSchema.getFieldNames(), action); return newScan; From 8e24881b2eeff830c8e5e803ca3ef8fa9e5fea5f Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Mon, 1 Dec 2025 10:46:01 -0800 Subject: [PATCH 86/99] Implement one-batch lookahead for index enumerators (#4345) --- .../job/ScheduledAsyncQueryJobRunner.java | 5 +- .../job/ScheduledAsyncQueryJobRunnerTest.java | 5 +- .../sql/datasources/utils/Scheduler.java | 4 +- docs/user/admin/settings.rst | 21 +++ .../legacy/executor/AsyncRestExecutor.java | 6 +- .../cursor/CursorAsyncRestExecutor.java | 5 +- .../sql/legacy/plugin/RestSqlStatsAction.java | 3 +- .../opensearch/client/OpenSearchClient.java | 3 +- .../client/OpenSearchNodeClient.java | 5 +- .../client/OpenSearchRestClient.java | 5 +- .../executor/OpenSearchExecutionEngine.java | 8 +- .../executor/OpenSearchQueryManager.java | 3 +- .../opensearch/storage/OpenSearchIndex.java | 34 +++- .../storage/scan/BackgroundSearchScanner.java | 177 ++++++++++++++++++ .../scan/OpenSearchIndexEnumerator.java | 55 +++--- .../client/OpenSearchRestClientTest.java | 3 +- .../OpenSearchDefaultImplementorTest.java | 11 +- .../storage/OpenSearchIndexTest.java | 4 + .../scan/BackgroundSearchScannerTest.java | 150 +++++++++++++++ .../org/opensearch/sql/plugin/SQLPlugin.java | 21 ++- 20 files changed, 453 insertions(+), 75 deletions(-) create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java create mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScannerTest.java diff --git a/async-query/src/main/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunner.java b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunner.java index feacb615390..ab39ecd62d0 100644 --- a/async-query/src/main/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunner.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunner.java @@ -12,7 +12,6 @@ import org.opensearch.jobscheduler.spi.ScheduledJobParameter; import org.opensearch.jobscheduler.spi.ScheduledJobRunner; import org.opensearch.plugins.Plugin; -import org.opensearch.sql.legacy.executor.AsyncRestExecutor; import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorService; import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.rest.model.CreateAsyncQueryRequest; @@ -21,6 +20,8 @@ import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.client.Client; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_WORKER_THREAD_POOL_NAME; + /** * The job runner class for scheduling async query. * @@ -37,7 +38,7 @@ public class ScheduledAsyncQueryJobRunner implements ScheduledJobRunner { // Share SQL plugin thread pool private static final String ASYNC_QUERY_THREAD_POOL_NAME = - AsyncRestExecutor.SQL_WORKER_THREAD_POOL_NAME; + SQL_WORKER_THREAD_POOL_NAME; private static final Logger LOGGER = LogManager.getLogger(ScheduledAsyncQueryJobRunner.class); private static final ScheduledAsyncQueryJobRunner INSTANCE = new ScheduledAsyncQueryJobRunner(); diff --git a/async-query/src/test/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunnerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunnerTest.java index 30b242db816..a34ad909365 100644 --- a/async-query/src/test/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunnerTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunnerTest.java @@ -15,6 +15,7 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_WORKER_THREAD_POOL_NAME; import java.time.Instant; import org.apache.logging.log4j.LogManager; @@ -87,7 +88,7 @@ public void testRunJobWithCorrectParameter() { spyJobRunner.runJob(request, context); ArgumentCaptor captor = ArgumentCaptor.forClass(Runnable.class); - verify(threadPool.executor(AsyncRestExecutor.SQL_WORKER_THREAD_POOL_NAME)) + verify(threadPool.executor(SQL_WORKER_THREAD_POOL_NAME)) .submit(captor.capture()); Runnable runnable = captor.getValue(); @@ -145,7 +146,7 @@ public void testDoRefreshThrowsException() { spyJobRunner.runJob(request, context); ArgumentCaptor captor = ArgumentCaptor.forClass(Runnable.class); - verify(threadPool.executor(AsyncRestExecutor.SQL_WORKER_THREAD_POOL_NAME)) + verify(threadPool.executor(SQL_WORKER_THREAD_POOL_NAME)) .submit(captor.capture()); Runnable runnable = captor.getValue(); diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/utils/Scheduler.java b/datasources/src/main/java/org/opensearch/sql/datasources/utils/Scheduler.java index 1cf54ffd88d..8aa96338948 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/utils/Scheduler.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/utils/Scheduler.java @@ -11,13 +11,11 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.client.node.NodeClient; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_WORKER_THREAD_POOL_NAME; /** The scheduler which schedule the task run in sql-worker thread pool. */ @UtilityClass public class Scheduler { - - public static final String SQL_WORKER_THREAD_POOL_NAME = "sql-worker"; - public static void schedule(NodeClient client, Runnable task) { ThreadPool threadPool = client.threadPool(); threadPool.schedule(withCurrentContext(task), new TimeValue(0), SQL_WORKER_THREAD_POOL_NAME); diff --git a/docs/user/admin/settings.rst b/docs/user/admin/settings.rst index cd8ee2458ae..735fc84ba34 100644 --- a/docs/user/admin/settings.rst +++ b/docs/user/admin/settings.rst @@ -253,6 +253,27 @@ Result set:: "transient": {} } +Thread Pool Settings +==================== + +The SQL plugin is integrated with the `OpenSearch Thread Pool Settings `_. +There are two thread pools which can be configured on cluster setup via `settings.yml`:: + + thread_pool: + sql-worker: + size: 30 + queue_size: 100 + sql_background_io: + size: 30 + queue_size: 1000 + +The ``sql-worker`` pool corresponds to compute resources related to running queries, such as compute-heavy evaluations on result sets. +This directly maps to the number of queries that can be run concurrently. +This is the primary pool you interact with externally. +``sql_background_io`` is a low-footprint pool for IO requests the plugin makes, +and can be used to limit indirect load that SQL places on your cluster for Calcite-enabled operations. +A ``sql-worker`` thread may spawn multiple background threads. + plugins.query.executionengine.spark.session.limit ================================================== diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/AsyncRestExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/AsyncRestExecutor.java index 2b16b584453..15580cf6477 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/AsyncRestExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/AsyncRestExecutor.java @@ -5,6 +5,8 @@ package org.opensearch.sql.legacy.executor; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_WORKER_THREAD_POOL_NAME; + import java.io.IOException; import java.time.Duration; import java.util.Map; @@ -30,10 +32,6 @@ /** A RestExecutor wrapper to execute request asynchronously to avoid blocking transport thread. */ public class AsyncRestExecutor implements RestExecutor { - - /** Custom thread pool name managed by OpenSearch */ - public static final String SQL_WORKER_THREAD_POOL_NAME = "sql-worker"; - private static final Logger LOG = LogManager.getLogger(AsyncRestExecutor.class); /** diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorAsyncRestExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorAsyncRestExecutor.java index 799aa55cf40..0a1e043b811 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorAsyncRestExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorAsyncRestExecutor.java @@ -5,6 +5,8 @@ package org.opensearch.sql.legacy.executor.cursor; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_WORKER_THREAD_POOL_NAME; + import java.io.IOException; import java.time.Duration; import java.util.Map; @@ -24,9 +26,6 @@ import org.opensearch.transport.client.Client; public class CursorAsyncRestExecutor { - /** Custom thread pool name managed by OpenSearch */ - public static final String SQL_WORKER_THREAD_POOL_NAME = "sql-worker"; - private static final Logger LOG = LogManager.getLogger(CursorAsyncRestExecutor.class); /** Delegated rest executor to async */ diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlStatsAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlStatsAction.java index 32b4d17ecda..bbf1d351850 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlStatsAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSqlStatsAction.java @@ -6,6 +6,7 @@ package org.opensearch.sql.legacy.plugin; import static org.opensearch.core.rest.RestStatus.INTERNAL_SERVER_ERROR; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_WORKER_THREAD_POOL_NAME; import com.google.common.collect.ImmutableList; import java.util.Arrays; @@ -90,7 +91,7 @@ protected Set responseParams() { private void schedule(NodeClient client, Runnable task) { ThreadPool threadPool = client.threadPool(); - threadPool.schedule(withCurrentContext(task), new TimeValue(0), "sql-worker"); + threadPool.schedule(withCurrentContext(task), new TimeValue(0), SQL_WORKER_THREAD_POOL_NAME); } private Runnable withCurrentContext(final Runnable task) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchClient.java index 0261bc98120..68350c5a0fd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchClient.java @@ -7,6 +7,7 @@ import java.util.List; import java.util.Map; +import java.util.Optional; import org.opensearch.action.search.CreatePitRequest; import org.opensearch.action.search.DeletePitRequest; import org.opensearch.sql.opensearch.mapping.IndexMapping; @@ -97,7 +98,7 @@ public interface OpenSearchClient { */ void schedule(Runnable task); - NodeClient getNodeClient(); + Optional getNodeClient(); /** * Create PIT for given indices diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java index 152fe499d2e..dab4b1e8ff1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java @@ -11,6 +11,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.ExecutionException; import java.util.function.Function; import java.util.function.Predicate; @@ -223,8 +224,8 @@ public void schedule(Runnable task) { } @Override - public NodeClient getNodeClient() { - return client; + public Optional getNodeClient() { + return Optional.of(client); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java index 87b171707bb..427eb7d6b03 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java @@ -13,6 +13,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.Stream; import lombok.RequiredArgsConstructor; @@ -236,8 +237,8 @@ public void schedule(Runnable task) { } @Override - public NodeClient getNodeClient() { - throw new UnsupportedOperationException("Unsupported method."); + public Optional getNodeClient() { + return Optional.empty(); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 6f0d4bf2f5a..07f0683acad 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -16,6 +16,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; @@ -53,13 +54,13 @@ import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; import org.opensearch.sql.opensearch.client.OpenSearchClient; -import org.opensearch.sql.opensearch.client.OpenSearchNodeClient; import org.opensearch.sql.opensearch.executor.protector.ExecutionProtector; import org.opensearch.sql.opensearch.functions.DistinctCountApproxAggFunction; import org.opensearch.sql.opensearch.functions.GeoIpFunction; import org.opensearch.sql.opensearch.util.JdbcOpenSearchDataTypeConvertor; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.TableScanOperator; +import org.opensearch.transport.client.node.NodeClient; /** OpenSearch execution engine implementation. */ public class OpenSearchExecutionEngine implements ExecutionEngine { @@ -275,9 +276,10 @@ private void buildResultSet( /** Registers opensearch-dependent functions */ private void registerOpenSearchFunctions() { - if (client instanceof OpenSearchNodeClient) { + Optional nodeClient = client.getNodeClient(); + if (nodeClient.isPresent()) { SqlUserDefinedFunction geoIpFunction = - new GeoIpFunction(client.getNodeClient()).toUDF(BuiltinFunctionName.GEOIP.name()); + new GeoIpFunction(nodeClient.get()).toUDF(BuiltinFunctionName.GEOIP.name()); PPLFuncImpTable.INSTANCE.registerExternalOperator(BuiltinFunctionName.GEOIP, geoIpFunction); OperatorTable.addOperator(BuiltinFunctionName.GEOIP.name(), geoIpFunction); } else { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java index 76218d8295d..75cc5280214 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java @@ -21,7 +21,8 @@ public class OpenSearchQueryManager implements QueryManager { private final NodeClient nodeClient; - private static final String SQL_WORKER_THREAD_POOL_NAME = "sql-worker"; + public static final String SQL_WORKER_THREAD_POOL_NAME = "sql-worker"; + public static final String SQL_BACKGROUND_THREAD_POOL_NAME = "sql_background_io"; @Override public QueryId submit(AbstractPlan queryPlan) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index ddb27328bbb..ab9ec64d102 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -10,6 +10,7 @@ import java.util.LinkedHashMap; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.function.Function; import java.util.stream.Collectors; import lombok.Getter; @@ -46,6 +47,7 @@ import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.storage.read.TableScanBuilder; +import org.opensearch.transport.client.node.NodeClient; /** OpenSearch table (index) implementation. */ public class OpenSearchIndex extends AbstractOpenSearchTable { @@ -243,27 +245,43 @@ public static class OpenSearchDefaultImplementor extends DefaultImplementor nc = client.getNodeClient(); + if (nc.isEmpty()) { + throw new UnsupportedOperationException( + "Unable to run Machine Learning operators on clients outside of the local node"); + } return new MLCommonsOperator( - visitChild(node, context), - node.getAlgorithm(), - node.getArguments(), - client.getNodeClient()); + visitChild(node, context), node.getAlgorithm(), node.getArguments(), nc.get()); } @Override public PhysicalPlan visitAD(LogicalAD node, OpenSearchIndexScan context) { - return new ADOperator(visitChild(node, context), node.getArguments(), client.getNodeClient()); + Optional nc = client.getNodeClient(); + if (nc.isEmpty()) { + throw new UnsupportedOperationException( + "Unable to run Anomaly Detector operators on clients outside of the local node"); + } + return new ADOperator(visitChild(node, context), node.getArguments(), nc.get()); } @Override public PhysicalPlan visitML(LogicalML node, OpenSearchIndexScan context) { - return new MLOperator(visitChild(node, context), node.getArguments(), client.getNodeClient()); + Optional nc = client.getNodeClient(); + if (nc.isEmpty()) { + throw new UnsupportedOperationException( + "Unable to run Machine Learning operators on clients outside of the local node"); + } + return new MLOperator(visitChild(node, context), node.getArguments(), nc.get()); } @Override public PhysicalPlan visitEval(LogicalEval node, OpenSearchIndexScan context) { - return new OpenSearchEvalOperator( - visitChild(node, context), node.getExpressions(), client.getNodeClient()); + Optional nc = client.getNodeClient(); + if (nc.isEmpty()) { + throw new UnsupportedOperationException( + "Unable to run Eval operators on clients outside of the local node"); + } + return new OpenSearchEvalOperator(visitChild(node, context), node.getExpressions(), nc.get()); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java new file mode 100644 index 00000000000..4019346e055 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java @@ -0,0 +1,177 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_BACKGROUND_THREAD_POOL_NAME; + +import java.util.Collections; +import java.util.Iterator; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import javax.annotation.Nullable; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.exception.NonFallbackCalciteException; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.sql.opensearch.response.OpenSearchResponse; + +/** + * Utility class for asynchronously scanning an index. This lets us send background requests to the + * index while we work on processing the previous batch. + * + *

    Lifecycle

    + * + * The typical usage pattern is: + * + *
    + *   1. Create scanner: new BackgroundSearchScanner(client)
    + *   2. Start initial scan: startScanning(request)
    + *   3. Fetch batches in a loop: fetchNextBatch(request, maxWindow)
    + *   4. Close scanner when done: close()
    + * 
    + * + *

    Async vs Sync Behavior

    + * + * The scanner attempts to operate asynchronously when possible to improve performance: + * + *
      + *
    • When async is available (client has thread pool access): - Next batch is pre-fetched while + * current batch is being processed - Reduces latency between batches + *
    • When async is not available (client lacks thread pool access): - Falls back to synchronous + * fetching - Each batch is fetched only when needed + *
    + * + *

    Termination Conditions

    + * + * Scanning will stop when any of these conditions are met: + * + *
      + *
    • An empty response is received (lastBatch = true) + *
    • Response is an aggregation or count response (fetchOnce = true) + *
    • Response size is less than maxResultWindow (fetchOnce = true) + *
    + * + * Note: This class should be explicitly closed when no longer needed to ensure proper resource + * cleanup. + */ +public class BackgroundSearchScanner { + private final OpenSearchClient client; + @Nullable private final Executor backgroundExecutor; + private CompletableFuture nextBatchFuture = null; + private boolean stopIteration = false; + + public BackgroundSearchScanner(OpenSearchClient client) { + this.client = client; + // We can only actually do the background operation if we have the ability to access the thread + // pool. Otherwise, fallback to synchronous fetch. + if (client.getNodeClient().isPresent()) { + this.backgroundExecutor = + client.getNodeClient().get().threadPool().executor(SQL_BACKGROUND_THREAD_POOL_NAME); + } else { + this.backgroundExecutor = null; + } + } + + private boolean isAsync() { + return backgroundExecutor != null; + } + + /** + * @return Whether the search scanner has fetched all batches + */ + public boolean isScanDone() { + return stopIteration; + } + + /** + * Initiates the scanning process. If async operations are available, this will trigger the first + * background fetch. + * + * @param request The OpenSearch request to execute + */ + public void startScanning(OpenSearchRequest request) { + if (isAsync()) { + nextBatchFuture = + CompletableFuture.supplyAsync(() -> client.search(request), backgroundExecutor); + } + } + + private OpenSearchResponse getCurrentResponse(OpenSearchRequest request) { + if (isAsync()) { + try { + return nextBatchFuture.get(); + } catch (InterruptedException | ExecutionException e) { + throw new NonFallbackCalciteException( + "Failed to fetch data from the index: the background task failed or interrupted.\n" + + " Inner error: " + + e.getMessage()); + } + } else { + return client.search(request); + } + } + + /** + * Fetches the next batch of results. If async is enabled and more batches are expected, this will + * also trigger the next background fetch. + * + * @param request The OpenSearch request to execute + * @param maxResultWindow Maximum number of results to fetch per batch + * @return SearchBatchResult containing the current batch's iterator and completion status + * @throws NonFallbackCalciteException if the background fetch fails or is interrupted + */ + public SearchBatchResult fetchNextBatch(OpenSearchRequest request, int maxResultWindow) { + OpenSearchResponse response = getCurrentResponse(request); + + // Determine if we need future batches + if (response.isAggregationResponse() + || response.isCountResponse() + || response.getHitsSize() < maxResultWindow) { + stopIteration = true; + } + + Iterator iterator; + if (!response.isEmpty()) { + iterator = response.iterator(); + + // Pre-fetch next batch if needed + if (!stopIteration && isAsync()) { + nextBatchFuture = + CompletableFuture.supplyAsync(() -> client.search(request), backgroundExecutor); + } + } else { + iterator = Collections.emptyIterator(); + stopIteration = true; + } + + return new SearchBatchResult(iterator, stopIteration); + } + + /** + * Resets the scanner to its initial state, allowing a new scan to begin. This clears all + * completion flags and initiates a new background fetch if async is enabled. + * + * @param request The OpenSearch request to execute + */ + public void reset(OpenSearchRequest request) { + stopIteration = false; + startScanning(request); + } + + /** + * Releases resources associated with this scanner. Cancels any pending background fetches and + * marks the scan as complete. The scanner cannot be reused after closing without calling reset(). + */ + public void close() { + stopIteration = true; + if (nextBatchFuture != null) { + nextBatchFuture.cancel(true); + } + } + + public record SearchBatchResult(Iterator iterator, boolean stopIteration) {} +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java index c4118a965da..e684d128914 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java @@ -17,7 +17,6 @@ import org.opensearch.sql.monitor.ResourceMonitor; import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.request.OpenSearchRequest; -import org.opensearch.sql.opensearch.response.OpenSearchResponse; /** * Supports a simple iteration over a collection for OpenSearch index @@ -31,6 +30,8 @@ public class OpenSearchIndexEnumerator implements Enumerator { /** OpenSearch client. */ private final OpenSearchClient client; + private final BackgroundSearchScanner bgScanner; + private final List fields; /** Search request. */ @@ -49,15 +50,12 @@ public class OpenSearchIndexEnumerator implements Enumerator { private final ResourceMonitor monitor; /** Number of rows returned. */ - private Integer queryCount; + private Integer queryCount = 0; /** Search response for current batch. */ private Iterator iterator; - private ExprValue current; - - /** flag to indicate whether fetch more than one batch */ - private boolean fetchOnce = false; + private ExprValue current = null; public OpenSearchIndexEnumerator( OpenSearchClient client, @@ -66,33 +64,24 @@ public OpenSearchIndexEnumerator( int maxResultWindow, OpenSearchRequest request, ResourceMonitor monitor) { - this.client = client; + if (!monitor.isHealthy()) { + throw new NonFallbackCalciteException("insufficient resources to run the query, quit."); + } + this.fields = fields; this.request = request; this.maxResponseSize = maxResponseSize; this.maxResultWindow = maxResultWindow; this.monitor = monitor; - this.queryCount = 0; - this.current = null; - if (!this.monitor.isHealthy()) { - throw new NonFallbackCalciteException("insufficient resources to run the query, quit."); - } + this.client = client; + this.bgScanner = new BackgroundSearchScanner(client); + this.bgScanner.startScanning(request); } - private void fetchNextBatch() { - OpenSearchResponse response = client.search(request); - if (response.isAggregationResponse() - || response.isCountResponse() - || response.getHitsSize() < maxResultWindow) { - // no need to fetch next batch if it's for an aggregation - // or the length of response hits is less than max result window size. - fetchOnce = true; - } - if (!response.isEmpty()) { - iterator = response.iterator(); - } else if (iterator == null) { - iterator = Collections.emptyIterator(); - } + private Iterator fetchNextBatch() { + BackgroundSearchScanner.SearchBatchResult result = + bgScanner.fetchNextBatch(request, maxResultWindow); + return result.iterator(); } @Override @@ -121,8 +110,8 @@ public boolean moveNext() { throw new NonFallbackCalciteException("insufficient resources to load next row, quit."); } - if (iterator == null || (!iterator.hasNext() && !fetchOnce)) { - fetchNextBatch(); + if (iterator == null || (!iterator.hasNext() && !this.bgScanner.isScanDone())) { + iterator = fetchNextBatch(); } if (iterator.hasNext()) { current = iterator.next(); @@ -135,18 +124,16 @@ public boolean moveNext() { @Override public void reset() { - OpenSearchResponse response = client.search(request); - if (!response.isEmpty()) { - iterator = response.iterator(); - } else { - iterator = Collections.emptyIterator(); - } + bgScanner.reset(request); + iterator = bgScanner.fetchNextBatch(request, maxResultWindow).iterator(); queryCount = 0; } @Override public void close() { iterator = Collections.emptyIterator(); + queryCount = 0; + bgScanner.close(); if (request != null) { client.forceCleanup(request); request = null; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java index 88a70c08b94..f93da87c303 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java @@ -29,6 +29,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; import lombok.SneakyThrows; import org.apache.commons.lang3.reflect.FieldUtils; @@ -527,7 +528,7 @@ void meta_with_IOException() throws IOException { @Test void ml_with_exception() { - assertThrows(UnsupportedOperationException.class, () -> client.getNodeClient()); + assertEquals(Optional.empty(), client.getNodeClient()); } private Map mockFieldMappings(String indexName, String mappings) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchDefaultImplementorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchDefaultImplementorTest.java index 85d0a4e94fa..1977a03a1be 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchDefaultImplementorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchDefaultImplementorTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; +import java.util.Optional; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Answers; @@ -18,6 +19,7 @@ import org.opensearch.sql.planner.logical.LogicalML; import org.opensearch.sql.planner.logical.LogicalMLCommons; import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.transport.client.node.NodeClient; @ExtendWith(MockitoExtension.class) public class OpenSearchDefaultImplementorTest { @@ -27,7 +29,8 @@ public class OpenSearchDefaultImplementorTest { @Test public void visitMachineLearning() { LogicalMLCommons node = Mockito.mock(LogicalMLCommons.class, Answers.RETURNS_DEEP_STUBS); - Mockito.when(node.getChild().get(0)).thenReturn(Mockito.mock(LogicalPlan.class)); + Mockito.when(node.getChild().getFirst()).thenReturn(Mockito.mock(LogicalPlan.class)); + Mockito.when(client.getNodeClient()).thenReturn(Optional.of(Mockito.mock(NodeClient.class))); OpenSearchIndex.OpenSearchDefaultImplementor implementor = new OpenSearchIndex.OpenSearchDefaultImplementor(client); assertNotNull(implementor.visitMLCommons(node, null)); @@ -36,7 +39,8 @@ public void visitMachineLearning() { @Test public void visitAD() { LogicalAD node = Mockito.mock(LogicalAD.class, Answers.RETURNS_DEEP_STUBS); - Mockito.when(node.getChild().get(0)).thenReturn(Mockito.mock(LogicalPlan.class)); + Mockito.when(node.getChild().getFirst()).thenReturn(Mockito.mock(LogicalPlan.class)); + Mockito.when(client.getNodeClient()).thenReturn(Optional.of(Mockito.mock(NodeClient.class))); OpenSearchIndex.OpenSearchDefaultImplementor implementor = new OpenSearchIndex.OpenSearchDefaultImplementor(client); assertNotNull(implementor.visitAD(node, null)); @@ -45,7 +49,8 @@ public void visitAD() { @Test public void visitML() { LogicalML node = Mockito.mock(LogicalML.class, Answers.RETURNS_DEEP_STUBS); - Mockito.when(node.getChild().get(0)).thenReturn(Mockito.mock(LogicalPlan.class)); + Mockito.when(node.getChild().getFirst()).thenReturn(Mockito.mock(LogicalPlan.class)); + Mockito.when(client.getNodeClient()).thenReturn(Optional.of(Mockito.mock(NodeClient.class))); OpenSearchIndex.OpenSearchDefaultImplementor implementor = new OpenSearchIndex.OpenSearchDefaultImplementor(client); assertNotNull(implementor.visitML(node, null)); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java index 6a4713dc917..7bba55955b2 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java @@ -31,6 +31,7 @@ import com.google.common.collect.ImmutableMap; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -38,6 +39,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.common.unit.TimeValue; import org.opensearch.sql.ast.tree.Sort; @@ -58,6 +60,7 @@ import org.opensearch.sql.planner.logical.LogicalPlan; import org.opensearch.sql.planner.logical.LogicalPlanDSL; import org.opensearch.sql.planner.physical.PhysicalPlanDSL; +import org.opensearch.transport.client.node.NodeClient; @ExtendWith(MockitoExtension.class) class OpenSearchIndexTest { @@ -225,6 +228,7 @@ void implementRelationOperatorWithOptimization() { @Test void implementOtherLogicalOperators() { when(client.getIndexMaxResultWindows("test")).thenReturn(Map.of("test", 10000)); + when(client.getNodeClient()).thenReturn(Optional.of(Mockito.mock(NodeClient.class))); NamedExpression include = named("age", ref("age", INTEGER)); ReferenceExpression exclude = ref("name", STRING); ReferenceExpression dedupeField = ref("name", STRING); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScannerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScannerTest.java new file mode 100644 index 00000000000..f4a7f297df9 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScannerTest.java @@ -0,0 +1,150 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.sql.opensearch.response.OpenSearchResponse; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.client.node.NodeClient; + +class BackgroundSearchScannerTest { + private OpenSearchClient client; + private NodeClient nodeClient; + private ThreadPool threadPool; + private OpenSearchRequest request; + private BackgroundSearchScanner scanner; + private ExecutorService executor; + + @BeforeEach + void setUp() { + client = mock(OpenSearchClient.class); + nodeClient = mock(NodeClient.class); + threadPool = mock(ThreadPool.class); + request = mock(OpenSearchRequest.class); + executor = Executors.newSingleThreadExecutor(); + + when(client.getNodeClient()).thenReturn(Optional.of(nodeClient)); + when(nodeClient.threadPool()).thenReturn(threadPool); + when(threadPool.executor(any())).thenReturn(executor); + + scanner = new BackgroundSearchScanner(client); + } + + @Test + void testSyncFallbackWhenNoNodeClient() { + // Setup client without node client + OpenSearchClient syncClient = mock(OpenSearchClient.class); + when(syncClient.getNodeClient()).thenReturn(Optional.empty()); + scanner = new BackgroundSearchScanner(syncClient); + + OpenSearchResponse response = mockResponse(false, false, 10); + when(syncClient.search(request)).thenReturn(response); + + scanner.startScanning(request); + BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request, 10); + + assertFalse( + result.stopIteration(), "Expected iteration to continue after fetching one full page"); + verify(syncClient, times(1)).search(request); + } + + @Test + void testCompleteScanWithMultipleBatches() { + // First batch: normal response + OpenSearchResponse response1 = mockResponse(false, false, 10); + // Second batch: empty response + OpenSearchResponse response2 = mockResponse(true, false, 5); + + when(client.search(request)).thenReturn(response1).thenReturn(response2); + + scanner.startScanning(request); + + // First batch + BackgroundSearchScanner.SearchBatchResult result1 = scanner.fetchNextBatch(request, 10); + assertFalse( + result1.stopIteration(), "Expected iteration to continue after fetching 10/15 results"); + assertTrue(result1.iterator().hasNext()); + + // Second batch + BackgroundSearchScanner.SearchBatchResult result2 = scanner.fetchNextBatch(request, 10); + assertTrue(result2.stopIteration()); + assertFalse(result2.iterator().hasNext()); + } + + @Test + void testFetchOnceForAggregationResponse() { + OpenSearchResponse response = mockResponse(false, true, 1); + when(client.search(request)).thenReturn(response); + + scanner.startScanning(request); + BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request, 10); + + assertTrue(scanner.isScanDone()); + } + + @Test + void testFetchOnceWhenResultsBelowWindow() { + OpenSearchResponse response = mockResponse(false, false, 5); + when(client.search(request)).thenReturn(response); + + scanner.startScanning(request); + BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request, 10); + + assertTrue(scanner.isScanDone()); + } + + @Test + void testReset() { + OpenSearchResponse response1 = mockResponse(false, false, 5); + OpenSearchResponse response2 = mockResponse(true, false, 0); + + when(client.search(request)).thenReturn(response1).thenReturn(response2); + + scanner.startScanning(request); + scanner.fetchNextBatch(request, 10); + scanner.fetchNextBatch(request, 10); + + assertTrue(scanner.isScanDone()); + + scanner.reset(request); + + assertFalse(scanner.isScanDone()); + } + + private OpenSearchResponse mockResponse(boolean isEmpty, boolean isAggregation, int numResults) { + OpenSearchResponse response = mock(OpenSearchResponse.class); + when(response.isEmpty()).thenReturn(isEmpty); + when(response.isAggregationResponse()).thenReturn(isAggregation); + + if (numResults > 0) { + ExprValue[] values = new ExprValue[numResults]; + Arrays.fill(values, mock(ExprValue.class)); + when(response.iterator()).thenReturn(Arrays.asList(values).iterator()); + } else { + when(response.iterator()).thenReturn(Collections.emptyIterator()); + } + + when(response.getHitsSize()).thenReturn(numResults); + return response; + } +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java index efd7a39d3c5..d817e13c69f 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java @@ -5,8 +5,9 @@ package org.opensearch.sql.plugin; -import static java.util.Collections.singletonList; import static org.opensearch.sql.datasource.model.DataSourceMetadata.defaultOpenSearchDataSourceMetadata; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_BACKGROUND_THREAD_POOL_NAME; +import static org.opensearch.sql.opensearch.executor.OpenSearchQueryManager.SQL_WORKER_THREAD_POOL_NAME; import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_REQUEST_BUFFER_INDEX_NAME; import com.google.common.collect.ImmutableList; @@ -85,7 +86,6 @@ import org.opensearch.sql.directquery.transport.model.ReadDirectQueryResourcesActionResponse; import org.opensearch.sql.directquery.transport.model.WriteDirectQueryResourcesActionResponse; import org.opensearch.sql.legacy.esdomain.LocalClusterState; -import org.opensearch.sql.legacy.executor.AsyncRestExecutor; import org.opensearch.sql.legacy.metrics.Metrics; import org.opensearch.sql.legacy.plugin.RestSqlAction; import org.opensearch.sql.legacy.plugin.RestSqlStatsAction; @@ -315,13 +315,24 @@ public ScheduledJobParser getJobParser() { @Override public List> getExecutorBuilders(Settings settings) { - return singletonList( + // The worker pool is the primary pool where most of the work is done. The background thread + // pool is a separate queue for asynchronous requests to other nodes. We keep them separate to + // prevent deadlocks during async fetches on small node counts. Tasks in the background pool + // should do no work except I/O to other services. + return List.of( new FixedExecutorBuilder( settings, - AsyncRestExecutor.SQL_WORKER_THREAD_POOL_NAME, + SQL_WORKER_THREAD_POOL_NAME, OpenSearchExecutors.allocatedProcessors(settings), 1000, - null)); + "thread_pool." + SQL_WORKER_THREAD_POOL_NAME), + new FixedExecutorBuilder( + settings, + SQL_BACKGROUND_THREAD_POOL_NAME, + settings.getAsInt( + "thread_pool.search.size", OpenSearchExecutors.allocatedProcessors(settings)), + 1000, + "thread_pool." + SQL_BACKGROUND_THREAD_POOL_NAME)); } @Override From b69d8dee0b07ae27ce9c790ed45cd4413fbf0fbc Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 2 Dec 2025 08:45:36 +0800 Subject: [PATCH 87/99] Add hashCode() and equals() to the implmentation classes of ExprJavaType (#4885) --- .../sql/calcite/type/ExprJavaType.java | 4 +- .../sql/data/model/ExprIpValue.java | 2 + .../rest-api-spec/test/issues/4726.yml | 83 +++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4726.yml diff --git a/core/src/main/java/org/opensearch/sql/calcite/type/ExprJavaType.java b/core/src/main/java/org/opensearch/sql/calcite/type/ExprJavaType.java index cbda5974d3c..d9b33902d10 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/type/ExprJavaType.java +++ b/core/src/main/java/org/opensearch/sql/calcite/type/ExprJavaType.java @@ -14,7 +14,9 @@ /** * The JavaType for ExprUDT. The UDT which needs to use self-implemented java class should extend - * this. + * this. Its javaClazz should override equals() and hashCode() methods. For example, {@link + * org.opensearch.sql.data.model.ExprIpValue} (javaClazz of {@link ExprIPType}) overrides the + * equals() and hashCode(). */ public class ExprJavaType extends AbstractExprRelDataType { public ExprJavaType(OpenSearchTypeFactory typeFactory, ExprUDT exprUDT, Class javaClazz) { diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java index 7723ee8c689..f951db9cf56 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java @@ -6,11 +6,13 @@ package org.opensearch.sql.data.model; import inet.ipaddr.IPAddress; +import lombok.EqualsAndHashCode; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.utils.IPUtils; /** Expression IP Address Value. */ +@EqualsAndHashCode(callSuper = false) public class ExprIpValue extends AbstractExprValue { private final IPAddress value; diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4726.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4726.yml new file mode 100644 index 00000000000..4e267493da3 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4726.yml @@ -0,0 +1,83 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + - do: + indices.create: + index: test1 + body: + mappings: + properties: + "timestamp": + type: date + "status": + type: integer + "client_ip": + type: ip + - do: + indices.create: + index: test2 + body: + mappings: + properties: + "client_ip": + type: ip + "city": + type: keyword + "country": + type: keyword + - do: + bulk: + index: test1 + refresh: true + body: + - '{"index":{}}' + - '{"datetime":"2025-01-15T00:30:00Z","status":200,"client_ip":"10.0.0.1"}' + - '{"index":{}}' + - '{"datetime":"2025-01-15T02:15:00Z","status":200,"client_ip":"10.0.0.2"}' + - '{"index":{}}' + - '{"datetime":"2025-01-15T10:50:00Z","status":200,"client_ip":"10.0.0.11"}' + - '{"index":{}}' + - '{"datetime":"2025-01-15T23:45:00Z","status":200,"client_ip":"10.0.0.24"}' + - do: + bulk: + index: test2 + refresh: true + body: + - '{"index":{}}' + - '{"client_ip": "10.0.0.1","country": "Canada","city": "Toronto"}' + - '{"index":{}}' + - '{"client_ip": "10.0.0.24","country": "UK","city": "London"}' + - '{"index":{}}' + - '{"client_ip": "10.0.1.1","country": "USA","city": "New York"}' + - '{"index":{}}' + - '{"client_ip": "10.0.1.2","country": "USA","city": "Seattle"}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"hash join on IP type should work": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test1 | stats count() as cnt by client_ip | join type=inner client_ip test2 | fields client_ip, cnt + + + - match: { total: 2 } + - match: {"datarows": [["10.0.0.1", 1], ["10.0.0.24", 1]]} From 2682804cbe593e09b7b1a2675ac4a24b486542bf Mon Sep 17 00:00:00 2001 From: Tomoyuki MORITA Date: Tue, 2 Dec 2025 09:27:17 -0800 Subject: [PATCH 88/99] Add config for CodeRabbit review (#4890) * Add review instructions for CodeRabbit Signed-off-by: Tomoyuki Morita * Disable auto review Signed-off-by: Tomoyuki Morita --------- Signed-off-by: Tomoyuki Morita --- .coderabbit.yaml | 97 +++++++++++++++++++++++++++++++++++++ .rules/REVIEW_GUIDELINES.md | 88 +++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 .coderabbit.yaml create mode 100644 .rules/REVIEW_GUIDELINES.md diff --git a/.coderabbit.yaml b/.coderabbit.yaml new file mode 100644 index 00000000000..11653b509a3 --- /dev/null +++ b/.coderabbit.yaml @@ -0,0 +1,97 @@ +# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json + +# CodeRabbit Configuration for OpenSearch SQL Project +# This configuration uses .rules/REVIEW_GUIDELINES.md for code review standards + +language: "en-US" +early_access: false + +reviews: + profile: "chill" + request_changes_workflow: false + high_level_summary: true + high_level_summary_placeholder: "@coderabbitai summary" + poem: false # Keep reviews professional and concise + review_status: true + collapse_walkthrough: false + + auto_review: + enabled: false # Disabled auto-review until it becomes stable + auto_incremental_review: false + drafts: false # Don't review draft PRs + ignore_title_keywords: + - "WIP" + - "DO NOT MERGE" + - "DRAFT" + + # Path-specific review instructions + path_instructions: + - path: "**/*.java" + instructions: | + - Verify Java naming conventions (PascalCase for classes, camelCase for methods/variables) + - Check for proper JavaDoc on public classes and methods + - Flag redundant comments that restate obvious code + - Ensure methods are under 20 lines with single responsibility + - Verify proper error handling with specific exception types + - Check for Optional usage instead of null returns + - Validate proper use of try-with-resources for resource management + + - path: "**/test/**/*.java" + instructions: | + - Verify test coverage for new business logic + - Check test naming follows conventions (*Test.java for unit, *IT.java for integration) + - Ensure tests are independent and don't rely on execution order + - Validate meaningful test data that reflects real-world scenarios + - Check for proper cleanup of test resources + + - path: "integ-test/**/*IT.java" + instructions: | + - Verify integration tests are in correct module (integ-test/) + - Check tests can be run with ./gradlew :integ-test:integTest + - Ensure proper test data setup and teardown + - Validate end-to-end scenario coverage + + - path: "**/ppl/**/*.java" + instructions: | + - For PPL parser changes, verify grammar tests with positive/negative cases + - Check AST generation for new syntax + - Ensure corresponding AST builder classes are updated + - Validate edge cases and boundary conditions + + - path: "**/calcite/**/*.java" + instructions: | + - Follow existing patterns in CalciteRelNodeVisitor and CalciteRexNodeVisitor + - Verify SQL generation and optimization paths + - Document any Calcite-specific workarounds + - Test compatibility with Calcite version constraints + +chat: + auto_reply: true + +# Knowledge base configuration +knowledge_base: + # Don't opt out - use knowledge base features + opt_out: false + + # Code guidelines - reference our custom review guidelines + code_guidelines: + enabled: true + filePatterns: + # Reference our custom review guidelines + - ".rules/REVIEW_GUIDELINES.md" + + # Enable web search for additional context + web_search: + enabled: true + + # Use repository-specific learnings for this project + learnings: + scope: "local" + + # Use repository-specific issues + issues: + scope: "local" + + # Use repository-specific pull requests for context + pull_requests: + scope: "local" diff --git a/.rules/REVIEW_GUIDELINES.md b/.rules/REVIEW_GUIDELINES.md new file mode 100644 index 00000000000..8ffb24c7bd5 --- /dev/null +++ b/.rules/REVIEW_GUIDELINES.md @@ -0,0 +1,88 @@ +# Code Review Guidelines for OpenSearch SQL + +This document provides guidelines for code reviews in the OpenSearch SQL project. These guidelines are used by CodeRabbit AI for automated code reviews and serve as a reference for human reviewers. + +## Core Review Principles + +### Code Quality +- **Simplicity First**: Prefer simpler solutions unless there's significant functional or performance degradation +- **Self-Documenting Code**: Code should be clear through naming and structure, not comments +- **No Redundant Comments**: Avoid comments that merely restate what the code does +- **Concise Implementation**: Keep code, docs, and notes short and focused on essentials + +### Java Standards +- **Naming Conventions**: + - Classes: `PascalCase` (e.g., `QueryExecutor`) + - Methods/Variables: `camelCase` (e.g., `executeQuery`) + - Constants: `UPPER_SNAKE_CASE` (e.g., `MAX_RETRY_COUNT`) +- **Method Size**: Keep methods under 20 lines with single responsibility +- **JavaDoc Required**: All public classes and methods must have proper JavaDoc +- **Error Handling**: Use specific exception types with meaningful messages +- **Null Safety**: Prefer `Optional` for nullable returns + +### Testing Requirements +- **Test Coverage**: All new business logic requires unit tests +- **Integration Tests**: End-to-end scenarios need integration tests in `integ-test/` module +- **Test Execution**: Verify changes with `./gradlew :integ-test:integTest` +- **No Failing Tests**: All tests must pass before merge; fix or ask for guidance if blocked + +### Code Organization +- **Single Responsibility**: Each class should have one clear purpose +- **Package Structure**: Follow existing module organization (core, ppl, sql, opensearch) +- **Separation of Concerns**: Keep parsing, execution, and storage logic separate +- **Composition Over Inheritance**: Prefer composition for code reuse + +### Performance & Security +- **Efficient Loops**: Avoid unnecessary object creation in loops +- **String Handling**: Use `StringBuilder` for concatenation in loops +- **Input Validation**: Validate all user inputs, especially queries +- **Logging Safety**: Sanitize data before logging to prevent injection +- **Resource Management**: Use try-with-resources for proper cleanup + +## Review Focus Areas + +### What to Check +1. **Code Clarity**: Is the code self-explanatory? +2. **Test Coverage**: Are there adequate tests? +3. **Error Handling**: Are errors handled appropriately? +4. **Documentation**: Is JavaDoc complete and accurate? +5. **Performance**: Are there obvious performance issues? +6. **Security**: Are inputs validated and sanitized? + +### What to Flag +- Redundant or obvious comments +- Methods longer than 20 lines +- Missing JavaDoc on public APIs +- Generic exception handling +- Unused imports or dead code +- Hard-coded values that should be constants +- Missing or inadequate test coverage + +### What to Encourage +- Clear, descriptive naming +- Proper use of Java idioms +- Comprehensive test coverage +- Meaningful error messages +- Efficient algorithms and data structures +- Security-conscious coding practices + +## Project-Specific Guidelines + +### OpenSearch SQL Context +- **JDK 21**: Required for development +- **Java 11 Compatibility**: Maintain when possible for OpenSearch 2.x +- **Module Structure**: Respect existing module boundaries +- **Integration Tests**: Use `./gradlew :integ-test:integTest` for testing +- **Test Naming**: `*IT.java` for integration tests, `*Test.java` for unit tests + +### PPL Parser Changes +- Test new grammar rules with positive and negative cases +- Verify AST generation for new syntax +- Include edge cases and boundary conditions +- Update corresponding AST builder classes + +### Calcite Integration +- If the PR is for PPL command, refer docs/dev/ppl-commands.md and verify the PR satisfy the checklist. +- Follow existing patterns in `CalciteRelNodeVisitor` and `CalciteRexNodeVisitor` +- Test SQL generation and optimization paths +- Document Calcite-specific workarounds From a85aead0041b1de18627b86c53bc193f6374292b Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Tue, 2 Dec 2025 11:46:42 -0800 Subject: [PATCH 89/99] Support timeouts for Calcite queries (#4857) --- .../sql/common/setting/Settings.java | 1 + docs/user/ppl/admin/settings.rst | 36 ++++++++ .../executor/OpenSearchQueryManager.java | 52 +++++++++++- .../planner/rules/AggregateIndexScanRule.java | 5 +- .../planner/rules/DedupPushdownRule.java | 6 +- .../ExpandCollationOnProjectExprRule.java | 5 +- .../planner/rules/FilterIndexScanRule.java | 5 +- .../planner/rules/InterruptibleRelRule.java | 83 +++++++++++++++++++ .../planner/rules/LimitIndexScanRule.java | 5 +- .../planner/rules/ProjectIndexScanRule.java | 5 +- .../planner/rules/RareTopPushdownRule.java | 5 +- .../rules/RelevanceFunctionPushdownRule.java | 6 +- .../rules/SortAggregateMeasureRule.java | 6 +- .../planner/rules/SortExprIndexScanRule.java | 8 +- .../planner/rules/SortIndexScanRule.java | 5 +- .../rules/SortProjectExprTransposeRule.java | 6 +- .../setting/OpenSearchSettings.java | 14 ++++ .../storage/scan/OpenSearchIndexScan.java | 11 +++ .../executor/OpenSearchQueryManagerTest.java | 13 ++- .../plugin/config/OpenSearchPluginModule.java | 4 +- 20 files changed, 237 insertions(+), 44 deletions(-) create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/InterruptibleRelRule.java diff --git a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java index 7613f5e3e17..96fe2e04eea 100644 --- a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java +++ b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java @@ -25,6 +25,7 @@ public enum Key { /** PPL Settings. */ PPL_ENABLED("plugins.ppl.enabled"), + PPL_QUERY_TIMEOUT("plugins.ppl.query.timeout"), PATTERN_METHOD("plugins.ppl.pattern.method"), PATTERN_MODE("plugins.ppl.pattern.mode"), PATTERN_MAX_SAMPLE_COUNT("plugins.ppl.pattern.max.sample.count"), diff --git a/docs/user/ppl/admin/settings.rst b/docs/user/ppl/admin/settings.rst index d99cdc6c2d0..ef9eba207fa 100644 --- a/docs/user/ppl/admin/settings.rst +++ b/docs/user/ppl/admin/settings.rst @@ -73,6 +73,42 @@ PPL query:: "status": 400 } +plugins.ppl.query.timeout +========================= + +Description +----------- + +This setting controls the maximum execution time for PPL queries. When a query exceeds this timeout, it will be interrupted and return a timeout error. + +1. The default value is 300s (5 minutes). +2. This setting is node scope. +3. This setting can be updated dynamically. + +Example +------- + +You can configure the query timeout: + +PPL query:: + + sh$ curl -sS -H 'Content-Type: application/json' \ + ... -X PUT localhost:9200/_plugins/_query/settings \ + ... -d '{"transient" : {"plugins.ppl.query.timeout" : "60s"}}' + { + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "ppl": { + "query": { + "timeout": "60s" + } + } + } + } + } + plugins.query.memory_limit ========================== diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java index 75cc5280214..dacb7f97eab 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManager.java @@ -7,11 +7,16 @@ import java.util.Map; import lombok.RequiredArgsConstructor; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.ThreadContext; +import org.opensearch.OpenSearchTimeoutException; import org.opensearch.common.unit.TimeValue; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.executor.QueryId; import org.opensearch.sql.executor.QueryManager; import org.opensearch.sql.executor.execution.AbstractPlan; +import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.client.node.NodeClient; @@ -19,21 +24,62 @@ @RequiredArgsConstructor public class OpenSearchQueryManager implements QueryManager { + private static final Logger LOG = LogManager.getLogger(OpenSearchQueryManager.class); + private final NodeClient nodeClient; + private final Settings settings; + public static final String SQL_WORKER_THREAD_POOL_NAME = "sql-worker"; public static final String SQL_BACKGROUND_THREAD_POOL_NAME = "sql_background_io"; @Override public QueryId submit(AbstractPlan queryPlan) { - schedule(nodeClient, () -> queryPlan.execute()); + TimeValue timeout = settings.getSettingValue(Settings.Key.PPL_QUERY_TIMEOUT); + schedule(nodeClient, queryPlan::execute, timeout); return queryPlan.getQueryId(); } - private void schedule(NodeClient client, Runnable task) { + private void schedule(NodeClient client, Runnable task, TimeValue timeout) { ThreadPool threadPool = client.threadPool(); - threadPool.schedule(withCurrentContext(task), new TimeValue(0), SQL_WORKER_THREAD_POOL_NAME); + + Runnable wrappedTask = + withCurrentContext( + () -> { + final Thread executionThread = Thread.currentThread(); + + Scheduler.ScheduledCancellable timeoutTask = + threadPool.schedule( + () -> { + LOG.warn( + "Query execution timed out after {}. Interrupting execution thread.", + timeout); + executionThread.interrupt(); + }, + timeout, + ThreadPool.Names.GENERIC); + + try { + task.run(); + timeoutTask.cancel(); + // Clear any leftover thread interrupts to keep the thread pool clean + Thread.interrupted(); + } catch (Exception e) { + timeoutTask.cancel(); + + // Special-case handling of timeout-related interruptions + if (Thread.interrupted() || e.getCause() instanceof InterruptedException) { + LOG.error("Query was interrupted due to timeout after {}", timeout); + throw new OpenSearchTimeoutException( + "Query execution timed out after " + timeout); + } + + throw e; + } + }); + + threadPool.schedule(wrappedTask, new TimeValue(0), SQL_WORKER_THREAD_POOL_NAME); } private Runnable withCurrentContext(final Runnable task) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java index 0f6c654df79..5c919a0e0cf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/AggregateIndexScanRule.java @@ -13,7 +13,6 @@ import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; @@ -40,7 +39,7 @@ /** Planner rule that push a {@link LogicalAggregate} down to {@link CalciteLogicalIndexScan} */ @Value.Enclosing -public class AggregateIndexScanRule extends RelRule { +public class AggregateIndexScanRule extends InterruptibleRelRule { /** Creates a AggregateIndexScanRule. */ protected AggregateIndexScanRule(Config config) { @@ -48,7 +47,7 @@ protected AggregateIndexScanRule(Config config) { } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { if (call.rels.length == 5) { final LogicalAggregate aggregate = call.rel(0); final LogicalProject topProject = call.rel(1); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index 068900d3f18..70e446fa4bc 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -9,7 +9,6 @@ import java.util.List; import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; @@ -31,7 +30,7 @@ import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @Value.Enclosing -public class DedupPushdownRule extends RelRule { +public class DedupPushdownRule extends InterruptibleRelRule { private static final Logger LOG = LogManager.getLogger(); protected DedupPushdownRule(Config config) { @@ -39,8 +38,9 @@ protected DedupPushdownRule(Config config) { } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { final LogicalProject finalProject = call.rel(0); + // TODO Used when number of duplication is more than 1 final LogicalFilter numOfDedupFilter = call.rel(1); final LogicalProject projectWithWindow = call.rel(2); if (call.rels.length == 5) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java index 2034eb1c6d8..0a8b3ae5f33 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ExpandCollationOnProjectExprRule.java @@ -11,7 +11,6 @@ import java.util.function.Predicate; import org.apache.calcite.adapter.enumerable.EnumerableProject; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.plan.RelTrait; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.plan.volcano.AbstractConverter; @@ -44,14 +43,14 @@ */ @Value.Enclosing public class ExpandCollationOnProjectExprRule - extends RelRule { + extends InterruptibleRelRule { protected ExpandCollationOnProjectExprRule(Config config) { super(config); } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { final AbstractConverter converter = call.rel(0); final Project project = call.rel(1); final RelTraitSet toTraits = converter.getTraitSet(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/FilterIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/FilterIndexScanRule.java index b0c4f55aa3d..b35c74ac829 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/FilterIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/FilterIndexScanRule.java @@ -7,7 +7,6 @@ import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.logical.LogicalFilter; @@ -18,7 +17,7 @@ /** Planner rule that push a {@link LogicalFilter} down to {@link CalciteLogicalIndexScan} */ @Value.Enclosing -public class FilterIndexScanRule extends RelRule { +public class FilterIndexScanRule extends InterruptibleRelRule { /** Creates a FilterIndexScanRule. */ protected FilterIndexScanRule(Config config) { @@ -26,7 +25,7 @@ protected FilterIndexScanRule(Config config) { } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { if (call.rels.length == 2) { // the ordinary variant final LogicalFilter filter = call.rel(0); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/InterruptibleRelRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/InterruptibleRelRule.java new file mode 100644 index 00000000000..59e94a4757c --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/InterruptibleRelRule.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.planner.rules; + +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelRule; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; + +/** + * Base class for OpenSearch planner rules that automatically checks for thread interruption during + * query planning. This ensures that long-running planning operations can be interrupted when a + * query timeout occurs. + * + *

    All OpenSearch planner rules should extend this class instead of extending {@link RelRule} + * directly. This provides automatic timeout support without requiring manual interruption checks in + * each rule. + * + *

    Example usage: + * + *

    {@code
    + * public class MyCustomRule extends InterruptibleRelRule {
    + *   protected MyCustomRule(Config config) {
    + *     super(config);
    + *   }
    + *
    + *   @Override
    + *   protected void onMatchImpl(RelOptRuleCall call) {
    + *     // Rule implementation - interruption is checked automatically
    + *     // before this method is called
    + *   }
    + * }
    + * }
    + * + * @param the configuration type for this rule + */ +public abstract class InterruptibleRelRule extends RelRule { + + /** + * Constructs an InterruptibleRelRule with the given configuration. + * + * @param config the rule configuration + */ + protected InterruptibleRelRule(C config) { + super(config); + } + + /** + * Called when the rule matches. This method checks for thread interruption before delegating to + * the implementation-specific {@link #onMatchImpl(RelOptRuleCall)} method. + * + *

    Do not override this method in subclasses. Instead, override {@link + * #onMatchImpl(RelOptRuleCall)}. + * + * @param call the rule call context + * @throws RuntimeException wrapping {@link InterruptedException} if the thread has been + * interrupted + */ + @Override + public final void onMatch(RelOptRuleCall call) { + if (Thread.currentThread().isInterrupted()) { + throw new OpenSearchTimeoutException( + new InterruptedException( + "Query planning interrupted in rule: " + getClass().getSimpleName())); + } + + onMatchImpl(call); + } + + /** + * Implementation-specific match handler. Subclasses must implement this method instead of + * overriding {@link #onMatch(RelOptRuleCall)}. + * + *

    This method is called after an automatic interruption check. If the thread has been + * interrupted (due to a timeout), this method will not be called. + * + * @param call the rule call context + */ + protected abstract void onMatchImpl(RelOptRuleCall call); +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java index ce99431fa8b..e9f7b725852 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/LimitIndexScanRule.java @@ -7,7 +7,6 @@ import java.util.Objects; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rex.RexLiteral; @@ -22,14 +21,14 @@ * down to {@link CalciteLogicalIndexScan} */ @Value.Enclosing -public class LimitIndexScanRule extends RelRule { +public class LimitIndexScanRule extends InterruptibleRelRule { protected LimitIndexScanRule(Config config) { super(config); } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { final LogicalSort sort = call.rel(0); final CalciteLogicalIndexScan scan = call.rel(1); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ProjectIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ProjectIndexScanRule.java index 629869be547..70f467ee8d0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ProjectIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/ProjectIndexScanRule.java @@ -12,7 +12,6 @@ import java.util.Objects; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptTable; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; @@ -27,7 +26,7 @@ /** Planner rule that push a {@link LogicalProject} down to {@link CalciteLogicalIndexScan} */ @Value.Enclosing -public class ProjectIndexScanRule extends RelRule { +public class ProjectIndexScanRule extends InterruptibleRelRule { /** Creates a ProjectIndexScanRule. */ protected ProjectIndexScanRule(Config config) { @@ -35,7 +34,7 @@ protected ProjectIndexScanRule(Config config) { } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { if (call.rels.length == 2) { // the ordinary variant final LogicalProject project = call.rel(0); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RareTopPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RareTopPushdownRule.java index 2d065ff0c3f..4403cdf9975 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RareTopPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RareTopPushdownRule.java @@ -8,7 +8,6 @@ import java.util.List; import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rex.RexCall; @@ -24,14 +23,14 @@ import org.opensearch.sql.opensearch.storage.scan.context.RareTopDigest; @Value.Enclosing -public class RareTopPushdownRule extends RelRule { +public class RareTopPushdownRule extends InterruptibleRelRule { protected RareTopPushdownRule(Config config) { super(config); } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { final LogicalFilter filter = call.rel(0); final LogicalProject project = call.rel(1); final CalciteLogicalIndexScan scan = call.rel(2); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RelevanceFunctionPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RelevanceFunctionPushdownRule.java index 6ec968ebc6d..9c96ed0c103 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RelevanceFunctionPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/RelevanceFunctionPushdownRule.java @@ -9,7 +9,6 @@ import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.logical.LogicalFilter; @@ -27,7 +26,8 @@ * relevance functions are always executed by OpenSearch for optimal performance and functionality. */ @Value.Enclosing -public class RelevanceFunctionPushdownRule extends RelRule { +public class RelevanceFunctionPushdownRule + extends InterruptibleRelRule { /** Creates an RelevanceFunctionPushdownRule. */ protected RelevanceFunctionPushdownRule(Config config) { @@ -35,7 +35,7 @@ protected RelevanceFunctionPushdownRule(Config config) { } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { if (call.rels.length == 2) { final LogicalFilter filter = call.rel(0); final CalciteLogicalIndexScan scan = call.rel(1); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortAggregateMeasureRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortAggregateMeasureRule.java index 62587a2d430..db96f4550fe 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortAggregateMeasureRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortAggregateMeasureRule.java @@ -7,7 +7,6 @@ import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.logical.LogicalSort; import org.immutables.value.Value; @@ -17,14 +16,15 @@ import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @Value.Enclosing -public class SortAggregateMeasureRule extends RelRule { +public class SortAggregateMeasureRule + extends InterruptibleRelRule { protected SortAggregateMeasureRule(Config config) { super(config); } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { final LogicalSort sort = call.rel(0); final CalciteLogicalIndexScan scan = call.rel(1); CalciteLogicalIndexScan newScan = scan.pushDownSortAggregateMeasure(sort); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java index 557eb3ce46e..aa2f8289a93 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortExprIndexScanRule.java @@ -12,7 +12,6 @@ import java.util.Optional; import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rel.core.Project; @@ -27,6 +26,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.immutables.value.Value; +import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; import org.opensearch.sql.opensearch.storage.scan.CalciteLogicalIndexScan; @@ -39,14 +39,14 @@ * the OpenSearch level for better performance. */ @Value.Enclosing -public class SortExprIndexScanRule extends RelRule { +public class SortExprIndexScanRule extends InterruptibleRelRule { protected SortExprIndexScanRule(SortExprIndexScanRule.Config config) { super(config); } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { final LogicalSort sort = call.rel(0); final LogicalProject project = call.rel(1); final CalciteLogicalIndexScan scan = call.rel(2); @@ -232,7 +232,7 @@ private boolean isSupportedSortScriptType(SqlTypeName sqlTypeName) { /** Rule configuration. */ @Value.Immutable - public interface Config extends RelRule.Config { + public interface Config extends OpenSearchRuleConfig { SortExprIndexScanRule.Config DEFAULT = ImmutableSortExprIndexScanRule.Config.builder() .build() diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortIndexScanRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortIndexScanRule.java index 86a039cc145..44f98b315bd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortIndexScanRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortIndexScanRule.java @@ -7,7 +7,6 @@ import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.core.Sort; import org.immutables.value.Value; import org.opensearch.sql.calcite.plan.OpenSearchRuleConfig; @@ -15,14 +14,14 @@ import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; @Value.Enclosing -public class SortIndexScanRule extends RelRule { +public class SortIndexScanRule extends InterruptibleRelRule { protected SortIndexScanRule(Config config) { super(config); } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { final Sort sort = call.rel(0); final AbstractCalciteIndexScan scan = call.rel(1); if (sort.getConvention() != scan.getConvention()) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java index fe0fae8e64a..a40ca3877bc 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/SortProjectExprTransposeRule.java @@ -12,7 +12,6 @@ import java.util.Optional; import java.util.function.Predicate; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelCollations; @@ -37,14 +36,15 @@ * push down sort expression script into scan. */ @Value.Enclosing -public class SortProjectExprTransposeRule extends RelRule { +public class SortProjectExprTransposeRule + extends InterruptibleRelRule { protected SortProjectExprTransposeRule(Config config) { super(config); } @Override - public void onMatch(RelOptRuleCall call) { + protected void onMatchImpl(RelOptRuleCall call) { final Sort sort = call.rel(0); final Project project = call.rel(1); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java index 9141c5a1837..bd8001f589d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java @@ -70,6 +70,13 @@ public class OpenSearchSettings extends Settings { Setting.Property.NodeScope, Setting.Property.Dynamic); + public static final Setting PPL_QUERY_TIMEOUT_SETTING = + Setting.positiveTimeSetting( + Key.PPL_QUERY_TIMEOUT.getKeyValue(), + TimeValue.timeValueSeconds(300), + Setting.Property.NodeScope, + Setting.Property.Dynamic); + public static final Setting PPL_SYNTAX_LEGACY_PREFERRED_SETTING = Setting.boolSetting( Key.PPL_SYNTAX_LEGACY_PREFERRED.getKeyValue(), @@ -364,6 +371,12 @@ public OpenSearchSettings(ClusterSettings clusterSettings) { Key.PPL_ENABLED, PPL_ENABLED_SETTING, new Updater(Key.PPL_ENABLED)); + register( + settingBuilder, + clusterSettings, + Key.PPL_QUERY_TIMEOUT, + PPL_QUERY_TIMEOUT_SETTING, + new Updater(Key.PPL_QUERY_TIMEOUT)); register( settingBuilder, clusterSettings, @@ -638,6 +651,7 @@ public static List> pluginSettings() { .add(SQL_SLOWLOG_SETTING) .add(SQL_CURSOR_KEEP_ALIVE_SETTING) .add(PPL_ENABLED_SETTING) + .add(PPL_QUERY_TIMEOUT_SETTING) .add(PPL_SYNTAX_LEGACY_PREFERRED_SETTING) .add(CALCITE_ENGINE_ENABLED_SETTING) .add(CALCITE_FALLBACK_ALLOWED_SETTING) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScan.java index 6aa73ee5865..cf7ac3d04a9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScan.java @@ -13,6 +13,7 @@ import lombok.EqualsAndHashCode; import lombok.ToString; import org.jetbrains.annotations.TestOnly; +import org.opensearch.OpenSearchTimeoutException; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.core.common.io.stream.BytesStreamInput; import org.opensearch.sql.data.model.ExprValue; @@ -69,6 +70,11 @@ public void open() { @Override public boolean hasNext() { + // Check for thread interruption to support query timeout + if (Thread.currentThread().isInterrupted()) { + throw new OpenSearchTimeoutException(new InterruptedException("Query execution interrupted")); + } + // For pagination and limit, we need to limit the return rows count to pageSize or limit size if (queryCount >= maxResponseSize) { return false; @@ -82,6 +88,11 @@ public boolean hasNext() { @Override public ExprValue next() { + // Check for thread interruption to support query timeout + if (Thread.currentThread().isInterrupted()) { + throw new OpenSearchTimeoutException(new InterruptedException("Query execution interrupted")); + } + queryCount++; return iterator.next(); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManagerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManagerTest.java index 651c176c866..1463cf48fff 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManagerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/OpenSearchQueryManagerTest.java @@ -16,14 +16,17 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.common.unit.TimeValue; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.response.ResponseListener; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.QueryId; import org.opensearch.sql.executor.QueryService; import org.opensearch.sql.executor.QueryType; import org.opensearch.sql.executor.execution.AbstractPlan; import org.opensearch.sql.executor.execution.QueryPlan; +import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.client.node.NodeClient; @@ -44,7 +47,12 @@ class OpenSearchQueryManagerTest { public void submitQuery() { NodeClient nodeClient = mock(NodeClient.class); ThreadPool threadPool = mock(ThreadPool.class); + Settings settings = mock(Settings.class); + Scheduler.ScheduledCancellable mockScheduledTask = mock(Scheduler.ScheduledCancellable.class); + when(nodeClient.threadPool()).thenReturn(threadPool); + when(settings.getSettingValue(Settings.Key.PPL_QUERY_TIMEOUT)) + .thenReturn(TimeValue.timeValueSeconds(60)); AtomicBoolean isRun = new AtomicBoolean(false); AbstractPlan queryPlan = @@ -55,15 +63,16 @@ public void execute() { } }; + // Mock the schedule method to run tasks immediately and return a mock ScheduledCancellable doAnswer( invocation -> { Runnable task = invocation.getArgument(0); task.run(); - return null; + return mockScheduledTask; }) .when(threadPool) .schedule(any(), any(), any()); - new OpenSearchQueryManager(nodeClient).submit(queryPlan); + new OpenSearchQueryManager(nodeClient, settings).submit(queryPlan); assertTrue(isRun.get()); } diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java index 05076506ce9..8027301073f 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/config/OpenSearchPluginModule.java @@ -80,8 +80,8 @@ public PlanSerializer planSerializer(StorageEngine storageEngine) { @Provides @Singleton - public QueryManager queryManager(NodeClient nodeClient) { - return new OpenSearchQueryManager(nodeClient); + public QueryManager queryManager(NodeClient nodeClient, Settings settings) { + return new OpenSearchQueryManager(nodeClient, settings); } @Provides From 6bd6b50cb1888550239619537cc27cf235443ab3 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Tue, 2 Dec 2025 13:40:19 -0800 Subject: [PATCH 90/99] [BugFix] Fix Memory Exhaustion for Multiple Filtering Operations in PPL (#4841) * [BugFix] Fix Regex OOM when there are 10+ regex clauses Signed-off-by: Jialiang Liang * fix unit tests Signed-off-by: Jialiang Liang * fix tests Signed-off-by: Jialiang Liang * fix explain tests and corresponding commands Signed-off-by: Jialiang Liang * fix explain tests for testFilterPushDownExplain Signed-off-by: Jialiang Liang * peng - isolate the fix logic to its own visitor class Signed-off-by: Jialiang Liang * Directly apply Calcite CoreRules.FILTER_MERGE before VolcanoPlanner plan Co-authored-by: Peng Huo Signed-off-by: Jialiang Liang * fix the UTs Signed-off-by: Jialiang Liang * fix the ITs after rebase Signed-off-by: Jialiang Liang * fix clickbench IT and more ITs Signed-off-by: Jialiang Liang * address comments from peng Signed-off-by: Jialiang Liang * add yaml test Signed-off-by: Jialiang Liang --------- Signed-off-by: Jialiang Liang Co-authored-by: Peng Huo --- .../opensearch/sql/executor/QueryService.java | 19 ++ .../big5/composite_date_histogram_daily.yaml | 7 +- .../calcite/big5/composite_terms.yaml | 7 +- .../calcite/big5/composite_terms_keyword.yaml | 7 +- .../big5/date_histogram_minute_agg.yaml | 7 +- .../calcite/big5/keyword_in_range.yaml | 5 +- .../calcite/big5/multi_terms_keyword.yaml | 7 +- .../calcite/clickbench/q11.yaml | 5 +- .../calcite/clickbench/q12.yaml | 5 +- .../calcite/clickbench/q13.yaml | 5 +- .../calcite/clickbench/q14.yaml | 5 +- .../calcite/clickbench/q15.yaml | 5 +- .../calcite/clickbench/q22.yaml | 5 +- .../calcite/clickbench/q23.yaml | 5 +- .../calcite/clickbench/q28.yaml | 5 +- .../calcite/clickbench/q31.yaml | 7 +- .../calcite/clickbench/q32.yaml | 7 +- .../calcite/clickbench/q37.yaml | 5 +- .../calcite/clickbench/q38.yaml | 5 +- .../calcite/clickbench/q39.yaml | 7 +- .../calcite/clickbench/q41.yaml | 7 +- .../calcite/clickbench/q42.yaml | 7 +- .../calcite/clickbench/q43.yaml | 7 +- .../expectedOutput/calcite/clickbench/q8.yaml | 5 +- .../calcite/explain_filter_push.yaml | 6 +- ...plain_filter_push_compare_date_string.yaml | 7 +- ...plain_filter_push_compare_time_string.yaml | 7 +- ..._filter_push_compare_timestamp_string.yaml | 5 +- .../calcite/explain_filter_with_search.yaml | 7 +- .../explain_filter_push.yaml | 6 +- ...plain_filter_push_compare_date_string.yaml | 7 +- ...plain_filter_push_compare_time_string.yaml | 7 +- ..._filter_push_compare_timestamp_string.yaml | 5 +- .../rest-api-spec/test/issues/4842.yml | 171 ++++++++++++++++++ .../ppl/calcite/CalcitePPLAbstractTest.java | 13 ++ .../sql/ppl/calcite/CalcitePPLRegexTest.java | 11 +- .../ppl/calcite/CalcitePPLTrendlineTest.java | 9 +- 37 files changed, 291 insertions(+), 126 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4842.yml diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index e4a6c5da21c..d23430e7c4a 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -16,11 +16,15 @@ import lombok.extern.log4j.Log4j2; import org.apache.calcite.jdbc.CalciteSchema; import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.tools.FrameworkConfig; @@ -52,6 +56,9 @@ @AllArgsConstructor @Log4j2 public class QueryService { + private static final HepProgram FILTER_MERGE_PROGRAM = + new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build(); + private final Analyzer analyzer; private final ExecutionEngine executionEngine; private final Planner planner; @@ -100,6 +107,7 @@ public void executeWithCalcite( CalcitePlanContext.create( buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType); RelNode relNode = analyze(plan, context); + relNode = mergeAdjacentFilters(relNode); RelNode optimized = optimize(relNode, context); RelNode calcitePlan = convertToCalcitePlan(optimized); executionEngine.execute(calcitePlan, context, listener); @@ -145,6 +153,7 @@ public void explainWithCalcite( context.run( () -> { RelNode relNode = analyze(plan, context); + relNode = mergeAdjacentFilters(relNode); RelNode optimized = optimize(relNode, context); RelNode calcitePlan = convertToCalcitePlan(optimized); executionEngine.explain(calcitePlan, format, context, listener); @@ -259,6 +268,16 @@ public RelNode analyze(UnresolvedPlan plan, CalcitePlanContext context) { return getRelNodeVisitor().analyze(plan, context); } + /** + * Run Calcite FILTER_MERGE once so adjacent filters created during analysis can collapse before + * the rest of optimization. + */ + private RelNode mergeAdjacentFilters(RelNode relNode) { + HepPlanner planner = new HepPlanner(FILTER_MERGE_PROGRAM); + planner.setRoot(relNode); + return planner.findBestExp(); + } + /** Analyze {@link UnresolvedPlan}. */ public LogicalPlan analyze(UnresolvedPlan plan, QueryType queryType) { return analyzer.analyze(plan, new AnalysisContext(queryType)); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml index 10023133a38..56dec15223d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$1], span(`@timestamp`,1d)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(`@timestamp`,1d)=[SPAN($17, 1, 'd')]) - LogicalFilter(condition=[IS NOT NULL($17)]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml index cc3af323ddf..6d3ef26ee3e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$2], process.name=[$0], cloud.region=[$1]) LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14]) - LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14))]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml index 9e546a26dbf..2b48025c015 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$3], process.name=[$0], cloud.region=[$1], aws.cloudwatch.log_stream=[$2]) LogicalAggregate(group=[{0, 1, 2}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14], aws.cloudwatch.log_stream=[$34]) - LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14), IS NOT NULL($34))]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14), IS NOT NULL($34))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}},{"exists":{"field":"aws.cloudwatch.log_stream","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml index 44b15522967..3d5639f94c0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml @@ -4,8 +4,7 @@ calcite: LogicalProject(count()=[$1], span(`@timestamp`,1m)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(`@timestamp`,1m)=[SPAN($17, 1, 'm')]) - LogicalFilter(condition=[IS NOT NULL($17)]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml index 85c08cf100c..e1f0873de61 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/keyword_in_range.yaml @@ -3,8 +3,7 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(agent=[$0], process=[$6], log=[$8], message=[$11], tags=[$12], cloud=[$13], input=[$15], @timestamp=[$17], ecs=[$18], data_stream=[$20], meta=[$24], host=[$26], metrics=[$27], aws=[$30], event=[$35]) LogicalSort(fetch=[10]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) - LogicalFilter(condition=[query_string(MAP('query', 'process.name:kernel':VARCHAR))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), >=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, aws, event], FILTER->AND(query_string(MAP('query', 'process.name:kernel':VARCHAR)), SEARCH($7, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR)), LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"process.name:kernel","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","aws","event"],"excludes":[]}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml index 89708ca4d4b..5659aca68dd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/multi_terms_keyword.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$2], process.name=[$0], cloud.region=[$1]) LogicalAggregate(group=[{0, 1}], count()=[COUNT()]) LogicalProject(process.name=[$7], cloud.region=[$14]) - LogicalFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($14))]) - LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-05 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-05 05:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14))]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->AND(SEARCH($2, Sarg[['2023-01-05 00:00:00':VARCHAR..'2023-01-05 05:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[count(), process.name, cloud.region], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-05T00:00:00.000Z","to":"2023-01-05T05:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"process.name|cloud.region":{"multi_terms":{"terms":[{"field":"process.name"},{"field":"cloud.region"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml index f21f57a583e..0f137b73605 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q11.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(u=[$1], MobilePhoneModel=[$0]) LogicalAggregate(group=[{0}], u=[COUNT(DISTINCT $1)]) LogicalProject(MobilePhoneModel=[$31], UserID=[$84]) - LogicalFilter(condition=[IS NOT NULL($31)]) - LogicalFilter(condition=[<>($31, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[<>($31, '')]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($31, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"MobilePhoneModel":{"terms":{"field":"MobilePhoneModel","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml index 9164e61b3e8..6b47ac718b6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q12.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(u=[$2], MobilePhone=[$0], MobilePhoneModel=[$1]) LogicalAggregate(group=[{0, 1}], u=[COUNT(DISTINCT $2)]) LogicalProject(MobilePhone=[$62], MobilePhoneModel=[$31], UserID=[$84]) - LogicalFilter(condition=[AND(IS NOT NULL($62), IS NOT NULL($31))]) - LogicalFilter(condition=[<>($31, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($31, ''), IS NOT NULL($62))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[MobilePhoneModel, MobilePhone, UserID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},u=COUNT(DISTINCT $2)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[u, MobilePhone, MobilePhoneModel], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"MobilePhoneModel","boost":1.0}}],"must_not":[{"term":{"MobilePhoneModel":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"MobilePhone","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["MobilePhoneModel","MobilePhone","UserID"],"excludes":[]},"aggregations":{"MobilePhone|MobilePhoneModel":{"multi_terms":{"terms":[{"field":"MobilePhone"},{"field":"MobilePhoneModel"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml index 8c7797a4ad7..3c2c0f9dfb5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q13.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$1], SearchPhrase=[$0]) LogicalAggregate(group=[{0}], c=[COUNT()]) LogicalProject(SearchPhrase=[$63]) - LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[<>($63, '')]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($63, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml index aa980934e37..c3678470be8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q14.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(u=[$1], SearchPhrase=[$0]) LogicalAggregate(group=[{0}], u=[COUNT(DISTINCT $1)]) LogicalProject(SearchPhrase=[$63], UserID=[$84]) - LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[<>($63, '')]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($63, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},u=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[u, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"u":"desc"},{"_key":"asc"}]},"aggregations":{"u":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml index 3b0000ec80a..7218a23bc29 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q15.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$2], SearchEngineID=[$0], SearchPhrase=[$1]) LogicalAggregate(group=[{0, 1}], c=[COUNT()]) LogicalProject(SearchEngineID=[$65], SearchPhrase=[$63]) - LogicalFilter(condition=[AND(IS NOT NULL($65), IS NOT NULL($63))]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($63, ''), IS NOT NULL($65))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, SearchEngineID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID"],"excludes":[]},"aggregations":{"SearchEngineID|SearchPhrase":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"SearchPhrase"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml index 513568e063b..a4cc2248e1e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q22.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$1], SearchPhrase=[$0]) LogicalAggregate(group=[{0}], c=[COUNT()]) LogicalProject(SearchPhrase=[$63]) - LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(LIKE($26, '%google%', '\'), <>($63, ''))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase], FILTER->AND(LIKE($0, '%google%', '\'), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"URL":{"wildcard":"*google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml index d38438ad4e9..5ec734b13ea 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$1], dc(UserID)=[$2], SearchPhrase=[$0]) LogicalAggregate(group=[{0}], c=[COUNT()], dc(UserID)=[COUNT(DISTINCT $1)]) LogicalProject(SearchPhrase=[$63], UserID=[$84]) - LogicalFilter(condition=[IS NOT NULL($63)]) - LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase, UserID, Title], FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),dc(UserID)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase","UserID","Title"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml index daa53e1c368..2bf72da9393 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml @@ -6,9 +6,8 @@ calcite: LogicalProject(l=[$1], c=[$2], CounterID=[$0]) LogicalAggregate(group=[{0}], l=[AVG($1)], c=[COUNT()]) LogicalProject(CounterID=[$103], $f2=[CHAR_LENGTH($26)]) - LogicalFilter(condition=[IS NOT NULL($103)]) - LogicalFilter(condition=[<>($26, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($26, ''), IS NOT NULL($103))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(fetch=[25]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml index 0f9c18436cd..b6a513d31ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q31.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$2], sum(IsRefresh)=[$3], avg(ResolutionWidth)=[$4], SearchEngineID=[$0], ClientIP=[$1]) LogicalAggregate(group=[{0, 1}], c=[COUNT()], sum(IsRefresh)=[SUM($2)], avg(ResolutionWidth)=[AVG($3)]) LogicalProject(SearchEngineID=[$65], ClientIP=[$76], IsRefresh=[$72], ResolutionWidth=[$80]) - LogicalFilter(condition=[AND(IS NOT NULL($65), IS NOT NULL($76))]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($63, ''), IS NOT NULL($65), IS NOT NULL($76))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml index 6016bd287c6..e20758eed71 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q32.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(c=[$2], sum(IsRefresh)=[$3], avg(ResolutionWidth)=[$4], WatchID=[$0], ClientIP=[$1]) LogicalAggregate(group=[{0, 1}], c=[COUNT()], sum(IsRefresh)=[SUM($2)], avg(ResolutionWidth)=[AVG($3)]) LogicalProject(WatchID=[$41], ClientIP=[$76], IsRefresh=[$72], ResolutionWidth=[$80]) - LogicalFilter(condition=[AND(IS NOT NULL($41), IS NOT NULL($76))]) - LogicalFilter(condition=[<>($63, '')]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(<>($63, ''), IS NOT NULL($41), IS NOT NULL($76))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER-><>($1, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml index ffe16fc8444..cea77806c98 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(PageViews=[$1], URL=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) LogicalProject(URL=[$26]) - LogicalFilter(condition=[IS NOT NULL($26)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($26, ''))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($2, 0), =($3, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml index 97b0fd3db2f..711cf5bc29e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q38.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(PageViews=[$1], Title=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) LogicalProject(Title=[$97]) - LogicalFilter(condition=[IS NOT NULL($97)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($42, 0), =($72, 0), <>($97, ''))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, DontCountHits, IsRefresh, Title, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($1, 0), =($2, 0), <>($3, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, Title], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"Title","boost":1.0}}],"must_not":[{"term":{"Title":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","DontCountHits","IsRefresh","Title","CounterID"],"excludes":[]},"aggregations":{"Title":{"terms":{"field":"Title","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml index 8b26cbbb03b..08ea4e0a45b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q39.yaml @@ -5,10 +5,9 @@ calcite: LogicalProject(PageViews=[$1], URL=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) LogicalProject(URL=[$26]) - LogicalFilter(condition=[IS NOT NULL($26)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), <>($49, 0), =($35, 0))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), <>($49, 0), =($35, 0), IS NOT NULL($26))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, IsDownload, IsLink, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","IsDownload","IsLink","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URL, IsDownload, IsLink, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), <>($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"IsLink","boost":1.0}}],"must_not":[{"term":{"IsLink":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"IsDownload":{"value":0,"boost":1.0}}},{"exists":{"field":"URL","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URL","IsDownload","IsLink","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URL":{"terms":{"field":"URL","size":1010,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml index 8c8a2359f51..cb31cbd45db 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q41.yaml @@ -5,10 +5,9 @@ calcite: LogicalProject(PageViews=[$2], URLHash=[$0], EventDate=[$1]) LogicalAggregate(group=[{0, 1}], PageViews=[COUNT()]) LogicalProject(URLHash=[$25], EventDate=[$0]) - LogicalFilter(condition=[AND(IS NOT NULL($25), IS NOT NULL($0))]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), SEARCH($12, Sarg[-1, 6]), =($11, 3594120000172545465))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), SEARCH($12, Sarg[-1, 6]), =($11, 3594120000172545465), IS NOT NULL($25))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[100], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, RefererHash, TraficSourceID, URLHash, IsRefresh, CounterID], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), SEARCH($2, Sarg[-1, 6]), =($1, 3594120000172545465), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, URLHash, EventDate]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"terms":{"TraficSourceID":[-1.0,6.0],"boost":1.0}},{"term":{"RefererHash":{"value":3594120000172545465,"boost":1.0}}},{"exists":{"field":"URLHash","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","RefererHash","TraficSourceID","URLHash","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"URLHash|EventDate":{"multi_terms":{"terms":[{"field":"URLHash"},{"field":"EventDate","value_type":"long"}],"size":110,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml index 2f5e72fc1a0..6e9ced0f691 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q42.yaml @@ -5,10 +5,9 @@ calcite: LogicalProject(PageViews=[$2], WindowClientWidth=[$0], WindowClientHeight=[$1]) LogicalAggregate(group=[{0, 1}], PageViews=[COUNT()]) LogicalProject(WindowClientWidth=[$104], WindowClientHeight=[$57]) - LogicalFilter(condition=[AND(IS NOT NULL($104), IS NOT NULL($57))]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), =($42, 0), =($25, 2868770270353813622))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-31 00:00:00':VARCHAR)), =($72, 0), =($42, 0), =($25, 2868770270353813622), IS NOT NULL($104), IS NOT NULL($57))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, URLHash, DontCountHits, WindowClientHeight, IsRefresh, CounterID, WindowClientWidth], FILTER->AND(=($5, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-31 00:00:00':VARCHAR]]:VARCHAR), =($4, 0), =($2, 0), =($1, 2868770270353813622), IS NOT NULL($6), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},PageViews=COUNT()), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[PageViews, WindowClientWidth, WindowClientHeight]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"URLHash":{"value":2868770270353813622,"boost":1.0}}},{"exists":{"field":"WindowClientWidth","boost":1.0}},{"exists":{"field":"WindowClientHeight","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","URLHash","DontCountHits","WindowClientHeight","IsRefresh","CounterID","WindowClientWidth"],"excludes":[]},"aggregations":{"WindowClientWidth|WindowClientHeight":{"multi_terms":{"terms":[{"field":"WindowClientWidth"},{"field":"WindowClientHeight"}],"size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml index 3bba5e1ed82..820898e401d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml @@ -5,11 +5,10 @@ calcite: LogicalProject(PageViews=[$1], M=[$0]) LogicalAggregate(group=[{0}], PageViews=[COUNT()]) LogicalProject(M=[SPAN($17, 1, 'm')]) - LogicalFilter(condition=[IS NOT NULL($17)]) - LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-15 00:00:00':VARCHAR)), =($72, 0), =($42, 0))]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00':VARCHAR)), <=($0, TIMESTAMP('2013-07-15 00:00:00':VARCHAR)), =($72, 0), =($42, 0), IS NOT NULL($17))]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], PageViews=[$t1], M=[$t0]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"exists":{"field":"EventTime","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml index 8ebc6067a0a..343aade48c4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q8.yaml @@ -5,8 +5,7 @@ calcite: LogicalProject(count()=[$1], AdvEngineID=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(AdvEngineID=[$19]) - LogicalFilter(condition=[IS NOT NULL($19)]) - LogicalFilter(condition=[<>($19, 0)]) - CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) + LogicalFilter(condition=[<>($19, 0)]) + CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>($19, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[count(), AdvEngineID], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"AdvEngineID":{"terms":{"field":"AdvEngineID","size":10000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml index 11bf9baa46c..eb020cf0f92 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push.yaml @@ -2,9 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age=[$8]) - LogicalFilter(condition=[>($3, 10000)]) - LogicalFilter(condition=[<($8, 40)]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalFilter(condition=[AND(SEARCH($8, Sarg[(30..40)]), >($3, 10000))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], FILTER->AND(SEARCH($1, Sarg[(30..40)]), >($0, 10000)), PROJECT->[age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"range":{"age":{"from":30.0,"to":40.0,"include_lower":false,"include_upper":false,"boost":1.0}}},{"range":{"balance":{"from":10000,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml index 55951816ff7..7f604d806ee 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_date_string.yaml @@ -1,9 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalFilter(condition=[<($0, DATE('2018-11-09 00:00:00.000000000':VARCHAR))]) - LogicalFilter(condition=[>($0, DATE('2016-12-08 00:00:00.123456789':VARCHAR))]) - LogicalProject(yyyy-MM-dd=[$83]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) + LogicalFilter(condition=[AND(>($0, DATE('2016-12-08 00:00:00.123456789':VARCHAR)), <($0, DATE('2018-11-09 00:00:00.000000000':VARCHAR)))]) + LogicalProject(yyyy-MM-dd=[$83]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[yyyy-MM-dd], FILTER->SEARCH($0, Sarg[('2016-12-08':VARCHAR..'2018-11-09':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"yyyy-MM-dd":{"from":"2016-12-08","to":"2018-11-09","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["yyyy-MM-dd"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml index faf6a3764c5..475bc85ecb2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_time_string.yaml @@ -1,9 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalFilter(condition=[<($0, TIME('2018-11-09 19:00:00.123456789':VARCHAR))]) - LogicalFilter(condition=[>($0, TIME('2016-12-08 12:00:00.123456789':VARCHAR))]) - LogicalProject(custom_time=[$49]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) + LogicalFilter(condition=[AND(>($0, TIME('2016-12-08 12:00:00.123456789':VARCHAR)), <($0, TIME('2018-11-09 19:00:00.123456789':VARCHAR)))]) + LogicalProject(custom_time=[$49]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]], PushDownContext=[[PROJECT->[custom_time], FILTER->SEARCH($0, Sarg[('12:00:00.123456789':VARCHAR..'19:00:00.123456789':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"custom_time":{"from":"12:00:00.123456789","to":"19:00:00.123456789","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["custom_time"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml index e0a3fc8a7d3..954fab09d05 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml @@ -2,8 +2,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) - LogicalFilter(condition=[<($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR))]) - LogicalFilter(condition=[>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR))]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalFilter(condition=[AND(>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR)), <($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":false,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml index bd8114a7989..dd1f6444d09 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml @@ -4,8 +4,7 @@ calcite: LogicalProject(count()=[$1], span(birthdate,1d)=[$0]) LogicalAggregate(group=[{0}], count()=[COUNT()]) LogicalProject(span(birthdate,1d)=[SPAN($3, 1, 'd')]) - LogicalFilter(condition=[IS NOT NULL($3)]) - LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml index ff9e2ed0ec1..d1f0cead8f6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push.yaml @@ -2,10 +2,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(age=[$8]) - LogicalFilter(condition=[>($3, 10000)]) - LogicalFilter(condition=[<($8, 40)]) - LogicalFilter(condition=[>($8, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalFilter(condition=[AND(SEARCH($8, Sarg[(30..40)]), >($3, 10000))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[Sarg[(30..40)]], expr#18=[SEARCH($t8, $t17)], expr#19=[10000], expr#20=[>($t3, $t19)], expr#21=[AND($t18, $t20)], age=[$t8], $condition=[$t21]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml index a8f52a8ac7e..f8fcc3a23a7 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_date_string.yaml @@ -1,10 +1,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalFilter(condition=[<($0, DATE('2018-11-09 00:00:00.000000000':VARCHAR))]) - LogicalFilter(condition=[>($0, DATE('2016-12-08 00:00:00.123456789':VARCHAR))]) - LogicalProject(yyyy-MM-dd=[$83]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) + LogicalFilter(condition=[AND(>($0, DATE('2016-12-08 00:00:00.123456789':VARCHAR)), <($0, DATE('2018-11-09 00:00:00.000000000':VARCHAR)))]) + LogicalProject(yyyy-MM-dd=[$83]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..94=[{inputs}], expr#95=[Sarg[('2016-12-08':VARCHAR..'2018-11-09':VARCHAR)]:VARCHAR], expr#96=[SEARCH($t83, $t95)], yyyy-MM-dd=[$t83], $condition=[$t96]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml index 72c738eaed0..4634cfaaa47 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_time_string.yaml @@ -1,10 +1,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalFilter(condition=[<($0, TIME('2018-11-09 19:00:00.123456789':VARCHAR))]) - LogicalFilter(condition=[>($0, TIME('2016-12-08 12:00:00.123456789':VARCHAR))]) - LogicalProject(custom_time=[$49]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) + LogicalFilter(condition=[AND(>($0, TIME('2016-12-08 12:00:00.123456789':VARCHAR)), <($0, TIME('2018-11-09 19:00:00.123456789':VARCHAR)))]) + LogicalProject(custom_time=[$49]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_date_formats]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..94=[{inputs}], expr#95=[Sarg[('12:00:00.123456789':VARCHAR..'19:00:00.123456789':VARCHAR)]:VARCHAR], expr#96=[SEARCH($t49, $t95)], custom_time=[$t49], $condition=[$t96]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml index 424444f8dc4..20f0cbf4238 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_filter_push_compare_timestamp_string.yaml @@ -2,9 +2,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12]) - LogicalFilter(condition=[<($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR))]) - LogicalFilter(condition=[>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR))]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalFilter(condition=[AND(>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR)), <($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR)))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..18=[{inputs}], expr#19=[Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR], expr#20=[SEARCH($t3, $t19)], proj#0..12=[{exprs}], $condition=[$t20]) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4842.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4842.yml new file mode 100644 index 00000000000..3a0425b4a3f --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4842.yml @@ -0,0 +1,171 @@ +setup: + - do: + indices.create: + index: test_filter_merge + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + name: + type: keyword + age: + type: integer + email: + type: keyword + status: + type: keyword + score: + type: double + city: + type: keyword + department: + type: keyword + active: + type: boolean + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"Filter merge with multiple consecutive where clauses": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_filter_merge + refresh: true + body: + - '{"index": {}}' + - '{"name": "Alice", "age": 30, "email": "alice@example.com", "status": "active", "score": 95.5, "city": "Seattle", "department": "Engineering", "active": true}' + - '{"index": {}}' + - '{"name": "Bob", "age": 25, "email": "bob@example.com", "status": "active", "score": 88.0, "city": "Portland", "department": "Sales", "active": true}' + - '{"index": {}}' + - '{"name": "Charlie", "age": 35, "email": "charlie@example.com", "status": "inactive", "score": 72.5, "city": "Seattle", "department": "Engineering", "active": false}' + - '{"index": {}}' + - '{"name": "Diana", "age": 28, "email": "diana@example.com", "status": "active", "score": 91.0, "city": "Seattle", "department": "Marketing", "active": true}' + - '{"index": {}}' + - '{"name": "Eve", "age": 32, "email": "eve@example.com", "status": "active", "score": 85.5, "city": "Portland", "department": "Engineering", "active": true}' + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source=test_filter_merge + | where name != "" + | where email != "" + | where status = "active" + | where age > 25 + | where age < 40 + | where score > 80 + | where score < 100 + | where city = "Seattle" + | where department = "Engineering" + | where active = true + | fields name, age, email, score + + - match: {"total": 1} + - match: {"datarows": [["Alice", 30, "alice@example.com", 95.5]]} + +--- +"Filter merge with IS NOT NULL checks": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_filter_merge + refresh: true + body: + - '{"index": {}}' + - '{"name": "Frank", "age": 40, "email": "frank@example.com", "status": "active", "score": 78.0, "city": "Boston", "department": "Sales", "active": true}' + - '{"index": {}}' + - '{"name": "Grace", "age": 35, "email": "", "status": "active", "score": 92.0, "city": "Boston", "department": "Sales", "active": true}' + - '{"index": {}}' + - '{"name": "", "age": 29, "email": "helen@example.com", "status": "active", "score": 80.0, "city": "Boston", "department": "Sales", "active": true}' + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source=test_filter_merge + | where name != "" + | where email != "" + | where status = "active" + | where isnotnull(score) + | where age > 30 + | where age < 50 + | where score > 70 + | where city = "Boston" + | where department = "Sales" + | where active = true + | fields name, email, status, score + + - match: {"total": 1} + - match: {"datarows": [["Frank", "frank@example.com", "active", 78.0]]} + +--- +"Filter merge with range and equality checks": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test_filter_merge + refresh: true + body: + - '{"index": {}}' + - '{"name": "Ivan", "age": 27, "email": "ivan@example.com", "status": "active", "score": 85.0, "city": "Seattle", "department": "HR", "active": true}' + - '{"index": {}}' + - '{"name": "Julia", "age": 33, "email": "julia@example.com", "status": "active", "score": 90.0, "city": "Portland", "department": "HR", "active": true}' + - '{"index": {}}' + - '{"name": "Kevin", "age": 45, "email": "kevin@example.com", "status": "active", "score": 88.0, "city": "Seattle", "department": "HR", "active": false}' + - '{"index": {}}' + - '{"name": "Laura", "age": 26, "email": "laura@example.com", "status": "inactive", "score": 75.0, "city": "Seattle", "department": "HR", "active": true}' + + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source=test_filter_merge + | where status = "active" + | where age >= 25 + | where age <= 35 + | where score >= 85 + | where score <= 95 + | where city = "Seattle" + | where name != "" + | where email != "" + | where department = "HR" + | where active = true + | stats count() by city + + - match: {"total": 1} + - match: {"datarows": [[1, "Seattle"]]} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java index 9dd01b30df5..ab07cd9b5c1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java @@ -22,9 +22,13 @@ import lombok.Getter; import org.apache.calcite.plan.Contexts; import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.rel2sql.RelToSqlConverter; import org.apache.calcite.rel.rel2sql.SqlImplementor; +import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.parser.SqlParser; @@ -101,10 +105,19 @@ public RelNode getRelNode(String ppl) { Query query = (Query) plan(pplParser, ppl); planTransformer.analyze(query.getPlan(), context); RelNode root = context.relBuilder.build(); + root = mergeAdjacentFilters(root); System.out.println(root.explain()); return root; } + private RelNode mergeAdjacentFilters(RelNode relNode) { + HepProgram program = + new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build(); + HepPlanner planner = new HepPlanner(program); + planner.setRoot(relNode); + return planner.findBestExp(); + } + private Node plan(PPLSyntaxParser parser, String query) { final AstStatementBuilder builder = new AstStatementBuilder( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRegexTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRegexTest.java index cfc0722bcfb..7e9d8a5c7bb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRegexTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRegexTest.java @@ -38,19 +38,18 @@ public void testRegexBasic() { public void testRegexChainedFilters() { String ppl = "source=EMP | regex ENAME='A.*' | regex JOB='.*CLERK' | fields ENAME, JOB"; RelNode root = getRelNode(ppl); + // Filter accumulation combines multiple regex conditions into a single Filter with AND String expectedLogical = "LogicalProject(ENAME=[$1], JOB=[$2])\n" - + " LogicalFilter(condition=[REGEXP_CONTAINS($2, '.*CLERK':VARCHAR)])\n" - + " LogicalFilter(condition=[REGEXP_CONTAINS($1, 'A.*':VARCHAR)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + + " LogicalFilter(condition=[AND(REGEXP_CONTAINS($1, 'A.*':VARCHAR)," + + " REGEXP_CONTAINS($2, '.*CLERK':VARCHAR))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, `JOB`\n" - + "FROM (SELECT *\n" + "FROM `scott`.`EMP`\n" - + "WHERE REGEXP_CONTAINS(`ENAME`, 'A.*')) `t`\n" - + "WHERE REGEXP_CONTAINS(`JOB`, '.*CLERK')"; + + "WHERE REGEXP_CONTAINS(`ENAME`, 'A.*') AND REGEXP_CONTAINS(`JOB`, '.*CLERK')"; verifyPPLToSparkSQL(root, expectedSparkSql); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java index b036a4b5906..3c23af4b7a6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java @@ -76,9 +76,8 @@ public void testTrendlineMultipleFields() { + " DEPTNO_trendline=[CASE(>(COUNT() OVER (ROWS 1 PRECEDING), 1), /(SUM($7) OVER (ROWS" + " 1 PRECEDING), CAST(COUNT($7) OVER (ROWS 1 PRECEDING)):DOUBLE NOT NULL)," + " null:NULL)])\n" - + " LogicalFilter(condition=[IS NOT NULL($7)])\n" - + " LogicalFilter(condition=[IS NOT NULL($5)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + + " LogicalFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($7))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = @@ -89,10 +88,8 @@ public void testTrendlineMultipleFields() { + " BETWEEN 1 PRECEDING AND CURRENT ROW)) > 1 THEN (SUM(`DEPTNO`) OVER (ROWS BETWEEN 1" + " PRECEDING AND CURRENT ROW)) / CAST(COUNT(`DEPTNO`) OVER (ROWS BETWEEN 1 PRECEDING" + " AND CURRENT ROW) AS DOUBLE) ELSE NULL END `DEPTNO_trendline`\n" - + "FROM (SELECT *\n" + "FROM `scott`.`EMP`\n" - + "WHERE `SAL` IS NOT NULL) `t`\n" - + "WHERE `DEPTNO` IS NOT NULL"; + + "WHERE `SAL` IS NOT NULL AND `DEPTNO` IS NOT NULL"; verifyPPLToSparkSQL(root, expectedSparkSql); } } From 7ace2061d65e4c34b4a992f856c318935c70946e Mon Sep 17 00:00:00 2001 From: Tomoyuki MORITA Date: Thu, 4 Dec 2025 09:29:29 -0800 Subject: [PATCH 91/99] Adjust CodeRabbit review config (#4901) Signed-off-by: Tomoyuki Morita --- .coderabbit.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 11653b509a3..1794e121fd7 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -10,14 +10,14 @@ reviews: profile: "chill" request_changes_workflow: false high_level_summary: true - high_level_summary_placeholder: "@coderabbitai summary" + high_level_summary_in_walkthrough: true poem: false # Keep reviews professional and concise review_status: true - collapse_walkthrough: false + collapse_walkthrough: true auto_review: - enabled: false # Disabled auto-review until it becomes stable - auto_incremental_review: false + enabled: true + auto_incremental_review: true drafts: false # Don't review draft PRs ignore_title_keywords: - "WIP" @@ -66,7 +66,8 @@ reviews: - Test compatibility with Calcite version constraints chat: - auto_reply: true + auto_reply: false # require explicit tagging + art: false # disable ASCII / Emoji art # Knowledge base configuration knowledge_base: From 58025be52e9d64846e1c0878e6da73551b46e709 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Thu, 4 Dec 2025 11:31:00 -0800 Subject: [PATCH 92/99] Remove access controller step in Calcite script (#4900) Signed-off-by: Simeon Widdis --- .../opensearch/storage/script/core/CalciteScript.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/CalciteScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/CalciteScript.java index ae79f18fa45..87428190d27 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/CalciteScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/CalciteScript.java @@ -5,8 +5,6 @@ package org.opensearch.sql.opensearch.storage.script.core; -import java.security.AccessController; -import java.security.PrivilegedAction; import java.util.Map; import lombok.EqualsAndHashCode; import org.apache.calcite.DataContext; @@ -44,10 +42,7 @@ public Object[] execute( Map> docProvider, SourceLookup sourceLookup, Map parametersToIndex) { - return AccessController.doPrivileged( - (PrivilegedAction) - () -> - function.apply( - new ScriptDataContext(docProvider, sourceLookup, params, parametersToIndex))); + return function.apply( + new ScriptDataContext(docProvider, sourceLookup, params, parametersToIndex)); } } From e9700d5867aadb47b41f9230595b6dae8b286973 Mon Sep 17 00:00:00 2001 From: Kai Huang <105710027+ahkcs@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:12:19 -0800 Subject: [PATCH 93/99] Error handling for dot-containing field names (#4907) --- docs/user/ppl/limitations/limitations.rst | 24 +++ .../rest-api-spec/test/issues/4896.yml | 204 ++++++++++++++++++ .../value/OpenSearchExprValueFactory.java | 55 ++++- .../value/OpenSearchExprValueFactoryTest.java | 108 ++++++++++ 4 files changed, 384 insertions(+), 7 deletions(-) create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml diff --git a/docs/user/ppl/limitations/limitations.rst b/docs/user/ppl/limitations/limitations.rst index f9c620ff18b..41d3a007d23 100644 --- a/docs/user/ppl/limitations/limitations.rst +++ b/docs/user/ppl/limitations/limitations.rst @@ -106,3 +106,27 @@ For the following functionalities, the query will be forwarded to the V2 query e * ``show datasources`` and command * Commands with ``fetch_size`` parameter + +Malformed Field Names in Object Fields +====================================== + +OpenSearch normally rejects field names containing problematic dot patterns (such as ``.``, ``..``, ``.a``, ``a.``, or ``a..b``). However, when an object field has ``enabled: false``, OpenSearch bypasses field name validation and allows storing documents with any field names. + +If a document contains malformed field names inside an object field, PPL ignores those malformed field names. Other valid fields in the document are returned normally. + +**Example of affected data:** + +.. code-block:: json + + { + "log": { + ".": "value1", + ".a": "value2", + "a.": "value3", + "a..b": "value4" + } + } + +When ``log`` is an object field with ``enabled: false``, subfields with malformed names are ignored. + +**Recommendation:** Avoid using field names that contain leading dots, trailing dots, consecutive dots, or consist only of dots. This aligns with OpenSearch's default field naming requirements. diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml new file mode 100644 index 00000000000..724957e7af6 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4896.yml @@ -0,0 +1,204 @@ +# Issue: https://github.com/opensearch-project/sql/issues/4896 +# ArrayIndexOutOfBoundsException when querying index with malformed field names in disabled object +# +# Root cause: When a document has a field name with problematic dot patterns (e.g., ".", "..", ".a", +# "a.", "a..b"), the JsonPath parsing logic fails because String.split("\\.") produces empty strings. +# +# This can happen when an index has a disabled object field (enabled: false), which allows storing +# documents without validating inner field names. Normal OpenSearch indices reject such field names. +# +# Fix: The query engine now detects malformed field names and returns null for those fields, +# allowing the rest of the document to be processed normally. + +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + # Create index with disabled object field to allow unusual field names + - do: + indices.create: + index: test_malformed_fields_4896 + body: + mappings: + properties: + log: + type: object + enabled: false + "@timestamp": + type: date + message: + type: text + status: + type: keyword + + # Use bulk indexing to create all test documents + - do: + bulk: + index: test_malformed_fields_4896 + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"@timestamp": "2025-11-26T17:10:00.000Z", "message": "single dot test", "status": "ok", "log": {".": "dot only value", "valid": "normal value"}}' + - '{"index": {"_id": "2"}}' + - '{"@timestamp": "2025-11-26T17:11:00.000Z", "message": "double dot test", "status": "ok", "log": {"..": "double dot value", "valid": "normal value"}}' + - '{"index": {"_id": "3"}}' + - '{"@timestamp": "2025-11-26T17:12:00.000Z", "message": "triple dot test", "status": "ok", "log": {"...": "triple dot value", "valid": "normal value"}}' + - '{"index": {"_id": "4"}}' + - '{"@timestamp": "2025-11-26T17:13:00.000Z", "message": "leading dot test", "status": "ok", "log": {".a": "leading dot value", "valid": "normal value"}}' + - '{"index": {"_id": "5"}}' + - '{"@timestamp": "2025-11-26T17:14:00.000Z", "message": "trailing dot test", "status": "ok", "log": {"a.": "trailing dot value", "valid": "normal value"}}' + - '{"index": {"_id": "6"}}' + - '{"@timestamp": "2025-11-26T17:15:00.000Z", "message": "consecutive dots test", "status": "ok", "log": {"a..b": "consecutive dots value", "valid": "normal value"}}' + - '{"index": {"_id": "7"}}' + - '{"@timestamp": "2025-11-26T17:16:00.000Z", "message": "multiple malformed test", "status": "ok", "log": {".": "dot1", "..": "dot2", ".leading": "dot3", "trailing.": "dot4", "mid..dle": "dot5", "valid1": "normal1", "valid2": "normal2"}}' + - '{"index": {"_id": "8"}}' + - '{"@timestamp": "2025-11-26T17:17:00.000Z", "message": "valid nested test", "status": "ok", "log": {"nested.field": "nested value"}}' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + - do: + indices.delete: + index: test_malformed_fields_4896 + +--- +"Query all documents with unusual field names succeeds": + - skip: + features: + - headers + # Before the fix: ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0 + # After the fix: Query succeeds for all documents + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | fields @timestamp, message, status | sort @timestamp + - match: { "total": 8 } + - match: { "datarows.0.0": "2025-11-26 17:10:00" } + - match: { "datarows.0.1": "single dot test" } + - match: { "datarows.7.0": "2025-11-26 17:17:00" } + - match: { "datarows.7.1": "valid nested test" } + +--- +"Single dot field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "single dot test" | fields log + - match: { "total": 1 } + # The "." field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Multiple dots field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "double dot test" | fields log + - match: { "total": 1 } + # The ".." field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "triple dot test" | fields log + - match: { "total": 1 } + # The "..." field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Leading dot field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "leading dot test" | fields log + - match: { "total": 1 } + # The ".a" field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Trailing dot field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "trailing dot test" | fields log + - match: { "total": 1 } + # The "a." field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Consecutive dots field name returns null for malformed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "consecutive dots test" | fields log + - match: { "total": 1 } + # The "a..b" field returns null, so log contains only the valid field + - match: { "datarows.0.0": {"valid": "normal value"} } + +--- +"Multiple malformed fields coexist with valid fields": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "multiple malformed test" | fields log + - match: { "total": 1 } + # All malformed fields return null, only valid fields remain + - match: { "datarows.0.0": {"valid1": "normal1", "valid2": "normal2"} } + +--- +"Valid nested field still works (issue #3477 compatibility)": + - skip: + features: + - headers + # This tests that the fix for #4896 doesn't break the flattening behavior from #3477 + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_malformed_fields_4896 | where message = "valid nested test" | fields log + - match: { "total": 1 } + # Valid nested field "nested.field" is properly expanded to nested structure + - match: { "datarows.0.0": {"nested": {"field": "nested value"}} } + diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index fc5610d73f0..ca2559ab5c4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -72,6 +72,7 @@ /** Construct ExprValue from OpenSearch response. */ public class OpenSearchExprValueFactory { + /** The Mapping of Field and ExprType. */ private final Map typeMapping; @@ -373,18 +374,58 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr content .map() .forEachRemaining( - entry -> + entry -> { + String fieldKey = entry.getKey(); + String fullFieldPath = makeField(prefix, fieldKey); + // Check for malformed field names before creating JsonPath. + // See isFieldNameMalformed() for details on what constitutes a malformed field name. + if (isFieldNameMalformed(fieldKey)) { + result.tupleValue().put(fieldKey, ExprNullValue.of()); + } else { populateValueRecursive( result, - new JsonPath(entry.getKey()), - parse( - entry.getValue(), - makeField(prefix, entry.getKey()), - type(makeField(prefix, entry.getKey())), - supportArrays))); + new JsonPath(fieldKey), + parse(entry.getValue(), fullFieldPath, type(fullFieldPath), supportArrays)); + } + }); return result; } + /** + * Check if a field name is malformed and cannot be processed by JsonPath. + * + *

    A field name is malformed if it contains dot patterns that would cause String.split("\\.") + * to produce empty strings. This includes: + * + *

      + *
    • Dot-only field names: ".", "..", "..." + *
    • Leading dots: ".a", "..a" + *
    • Trailing dots: "a.", "a.." + *
    • Consecutive dots: "a..b", "a...b" + *
    + * + *

    Such field names can occur in disabled object fields (enabled: false) which bypass + * OpenSearch's field name validation. Normal OpenSearch indices reject these field names. + * + * @param fieldName The field name to check. + * @return true if the field name is malformed, false otherwise. + */ + static boolean isFieldNameMalformed(String fieldName) { + // Use -1 limit to preserve trailing empty strings (e.g., "a." -> ["a", ""]) + String[] parts = fieldName.split("\\.", -1); + // Dot-only field names produce empty array + if (parts.length == 0) { + return true; + } + // Check for empty parts which indicate leading, trailing, or consecutive dots + for (String part : parts) { + if (part.isEmpty()) { + return true; + } + } + return false; + } + /** * Populate the current ExprTupleValue recursively. * diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 32ba07d4d53..0734613e522 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -1069,6 +1069,114 @@ private ExprValue constructFromObjectWithArraySupport(String fieldName, Object v return exprValueFactory.construct(fieldName, value, true); } + // ==================== Malformed Field Name Tests ==================== + // Tests for issue #4896: ArrayIndexOutOfBoundsException with dot-containing field names + + @Test + public void isFieldNameMalformed_dotOnlyFieldNames() { + // Single dot + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed(".")); + // Multiple dots + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("..")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("...")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("....")); + } + + @Test + public void isFieldNameMalformed_leadingDots() { + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed(".a")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("..a")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed(".field")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("..field.name")); + } + + @Test + public void isFieldNameMalformed_trailingDots() { + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a.")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a..")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("field.")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("field.name..")); + } + + @Test + public void isFieldNameMalformed_consecutiveDots() { + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a..b")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a...b")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("field..name")); + assertTrue(OpenSearchExprValueFactory.isFieldNameMalformed("a..b..c")); + } + + @Test + public void isFieldNameMalformed_validFieldNames() { + // Simple field names + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("a")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("field")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("fieldName")); + // Nested field names (valid dot usage) + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("a.b")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("log.json")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("a.b.c")); + assertFalse(OpenSearchExprValueFactory.isFieldNameMalformed("field.name.value")); + } + + @Test + public void constructStructWithDotOnlyFieldName_returnsNull() { + // Test that a single dot field name returns null + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\".\":\"value\"}}").get("structV").tupleValue().get(".")); + } + + @Test + public void constructStructWithMultipleDotFieldName_returnsNull() { + // Test that multiple dot field names return null + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"..\":\"value\"}}").get("structV").tupleValue().get("..")); + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"...\":\"value\"}}").get("structV").tupleValue().get("...")); + } + + @Test + public void constructStructWithLeadingDotFieldName_returnsNull() { + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\".a\":\"value\"}}").get("structV").tupleValue().get(".a")); + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"..a\":\"value\"}}").get("structV").tupleValue().get("..a")); + } + + @Test + public void constructStructWithTrailingDotFieldName_returnsNull() { + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"a.\":\"value\"}}").get("structV").tupleValue().get("a.")); + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"a..\":\"value\"}}").get("structV").tupleValue().get("a..")); + } + + @Test + public void constructStructWithConsecutiveDotsFieldName_returnsNull() { + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"a..b\":\"value\"}}").get("structV").tupleValue().get("a..b")); + assertEquals( + nullValue(), + tupleValue("{\"structV\":{\"a...b\":\"value\"}}").get("structV").tupleValue().get("a...b")); + } + + @Test + public void constructStructWithMalformedAndValidFields_preservesValidFields() { + // Test that valid fields are preserved when malformed fields are present + Map structValue = + tupleValue("{\"structV\":{\".\":\"bad\",\"good\":\"value\"}}").get("structV").tupleValue(); + assertEquals(nullValue(), structValue.get(".")); + assertEquals(stringValue("value"), structValue.get("good")); + } + @EqualsAndHashCode(callSuper = false) @ToString private static class TestType extends OpenSearchDataType { From 37a025671bd45ddbb8cf10eab01b7e900a830c9b Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Mon, 8 Dec 2025 16:45:14 +0800 Subject: [PATCH 94/99] Support composite aggregation paginating (#4884) * Support composite aggregation paginating in HAVING clause Signed-off-by: Lantao Jin * typo Signed-off-by: Lantao Jin * refactor Signed-off-by: Lantao Jin * refactor Signed-off-by: Lantao Jin * Fix IT Signed-off-by: Lantao Jin * Fix doctest and IT Signed-off-by: Lantao Jin * secruity it Signed-off-by: Lantao Jin * revert changes in OpenSearchIndexScan Signed-off-by: Lantao Jin * Fix compile error Signed-off-by: Lantao Jin * Fix v2 paginationIT Signed-off-by: Lantao Jin * optimize request total size in compoisite agg Signed-off-by: Lantao Jin * fix it Signed-off-by: Lantao Jin * Refactor Signed-off-by: Lantao Jin --------- Signed-off-by: Lantao Jin --- .../sql/calcite/plan/LogicalSystemLimit.java | 14 +- .../calcite/utils/PPLHintStrategyTable.java | 33 +++++ .../sql/calcite/utils/PlanUtils.java | 12 +- .../NonFallbackCalciteException.java | 4 + docs/user/optimization/optimization.rst | 22 +-- docs/user/ppl/cmd/explain.rst | 2 +- .../sql/calcite/remote/CalciteExplainIT.java | 119 +++++++++++++++- .../remote/CalcitePPLAggregationIT.java | 10 ++ .../CalcitePPLAggregationPaginatingIT.java | 23 +++ .../calcite/remote/CalciteStatsCommandIT.java | 92 ++++++++++++ .../standalone/CalcitePPLIntegTestCase.java | 1 + .../tpch/CalcitePPLTpchPaginatingIT.java | 25 ++++ .../org/opensearch/sql/ppl/StandaloneIT.java | 1 + .../org/opensearch/sql/util/MatcherUtils.java | 13 +- .../calcite/agg_case_cannot_push.yaml | 2 +- .../agg_case_composite_cannot_push.yaml | 2 +- .../calcite/agg_case_num_res_cannot_push.yaml | 2 +- .../agg_composite2_range_count_push.yaml | 2 +- ...agg_composite2_range_range_count_push.yaml | 2 +- ..._composite_autodate_range_metric_push.yaml | 2 +- .../agg_composite_date_range_push.yaml | 2 +- .../agg_composite_range_metric_push.yaml | 2 +- .../big5/composite_date_histogram_daily.yaml | 2 +- .../calcite/big5/composite_terms.yaml | 2 +- .../calcite/big5/composite_terms_keyword.yaml | 2 +- .../big5/date_histogram_hourly_agg.yaml | 2 +- .../big5/date_histogram_minute_agg.yaml | 2 +- .../calcite/big5/terms_significant_1.yaml | 2 +- .../calcite/big5/terms_significant_2.yaml | 2 +- .../calcite/chart_single_group_key.yaml | 2 +- .../calcite/chart_with_integer_span.yaml | 2 +- .../calcite/chart_with_limit.yaml | 2 +- .../calcite/chart_with_timestamp_span.yaml | 2 +- .../calcite/clickbench/q18.yaml | 2 +- .../calcite/clickbench/q28.yaml | 2 +- .../calcite/clickbench/q29.yaml | 2 +- .../calcite/clickbench/q43.yaml | 2 +- .../calcite/explain_agg_counts_by1.yaml | 2 +- .../calcite/explain_agg_counts_by2.yaml | 2 +- .../calcite/explain_agg_counts_by3.yaml | 2 +- .../calcite/explain_agg_counts_by4.yaml | 2 +- .../calcite/explain_agg_counts_by5.yaml | 2 +- .../calcite/explain_agg_counts_by6.yaml | 2 +- .../calcite/explain_agg_group_merge.yaml | 2 +- .../explain_agg_paginating_having1.yaml | 12 ++ .../explain_agg_paginating_having2.yaml | 13 ++ .../explain_agg_paginating_having3.yaml | 12 ++ .../explain_agg_paginating_head_from.yaml | 12 ++ ...ginating_head_size_query_bucket_size1.yaml | 10 ++ ...ginating_head_size_query_bucket_size2.yaml | 10 ++ ...ginating_head_size_query_bucket_size3.yaml | 10 ++ .../calcite/explain_agg_paginating_join1.yaml | 19 +++ .../calcite/explain_agg_paginating_join2.yaml | 21 +++ .../calcite/explain_agg_paginating_join3.yaml | 19 +++ .../calcite/explain_agg_paginating_join4.yaml | 20 +++ .../explain_agg_script_timestamp_push.yaml | 2 +- .../explain_agg_script_udt_arg_push.yaml | 2 +- ...ith_distinct_count_approx_enhancement.json | 2 +- .../calcite/explain_agg_with_script.yaml | 2 +- .../explain_agg_with_sum_enhancement.yaml | 2 +- .../calcite/explain_append_command.json | 6 - .../calcite/explain_append_command.yaml | 17 +++ .../calcite/explain_appendpipe_command.json | 2 +- .../calcite/explain_dedup_complex1.yaml | 2 +- .../calcite/explain_dedup_complex2.yaml | 2 +- .../calcite/explain_dedup_complex3.yaml | 2 +- .../calcite/explain_dedup_complex4.yaml | 2 +- .../explain_dedup_keepempty_false_push.yaml | 2 +- .../calcite/explain_dedup_push.yaml | 2 +- .../calcite/explain_earliest_latest.yaml | 2 +- .../explain_earliest_latest_custom_time.yaml | 2 +- .../calcite/explain_filter_agg_push.json | 0 .../calcite/explain_filter_agg_push.yaml | 2 +- .../calcite/explain_filter_with_search.yaml | 2 +- .../calcite/explain_first_last.yaml | 2 +- .../calcite/explain_limit_agg_pushdown.json | 2 +- .../calcite/explain_limit_agg_pushdown2.yaml | 2 +- .../calcite/explain_limit_agg_pushdown3.json | 2 +- .../calcite/explain_limit_agg_pushdown4.yaml | 2 +- ...n_limit_agg_pushdown_bucket_nullable1.yaml | 2 +- ...n_limit_agg_pushdown_bucket_nullable2.yaml | 2 +- .../calcite/explain_output.yaml | 2 +- ...lain_patterns_simple_pattern_agg_push.yaml | 2 +- .../calcite/explain_script_push_on_text.yaml | 2 +- .../calcite/explain_sort_then_agg_push.json | 2 +- .../explain_stats_bins_on_time_and_term.yaml | 2 +- .../explain_stats_bins_on_time_and_term2.yaml | 2 +- .../calcite/explain_stats_by_span.json | 2 +- ...ain_stats_by_span_non_bucket_nullable.json | 2 +- .../calcite/explain_stats_by_timespan.yaml | 2 +- .../calcite/explain_stats_by_timespan2.yaml | 2 +- .../calcite/udf_geoip_in_agg_pushed.yaml | 2 +- .../explain_append_command.json | 6 - .../explain_append_command.yaml | 20 +++ .../ppl/big5/asc_sort_timestamp.yaml | 3 +- ...asc_sort_timestamp_can_match_shortcut.yaml | 4 +- ..._sort_timestamp_no_can_match_shortcut.yaml | 4 +- .../big5/asc_sort_with_after_timestamp.yaml | 3 +- .../ppl/big5/cardinality_agg_high.yaml | 4 +- .../ppl/big5/cardinality_agg_high_2.yaml | 4 +- .../ppl/big5/cardinality_agg_low.yaml | 4 +- .../big5/composite_date_histogram_daily.yaml | 4 +- .../ppl/big5/composite_terms.yaml | 5 +- .../ppl/big5/composite_terms_keyword.yaml | 5 +- .../ppl/big5/date_histogram_hourly_agg.yaml | 4 +- .../ppl/big5/date_histogram_minute_agg.yaml | 4 +- .../expectedOutput/ppl/big5/default.yaml | 3 +- .../ppl/big5/desc_sort_timestamp.yaml | 3 +- ...esc_sort_timestamp_can_match_shortcut.yaml | 4 +- ..._sort_timestamp_no_can_match_shortcut.yaml | 4 +- .../big5/desc_sort_with_after_timestamp.yaml | 3 +- .../ppl/big5/keyword_in_range.yaml | 3 +- .../ppl/big5/keyword_terms.yaml | 5 +- .../big5/keyword_terms_low_cardinality.yaml | 5 +- .../ppl/big5/multi_terms_keyword.yaml | 5 +- .../ppl/big5/query_string_on_message.yaml | 4 +- .../query_string_on_message_filtered.yaml | 3 +- ...string_on_message_filtered_sorted_num.yaml | 3 +- .../expectedOutput/ppl/big5/range.yaml | 4 +- .../expectedOutput/ppl/big5/range_agg_1.yaml | 3 +- .../expectedOutput/ppl/big5/range_agg_2.yaml | 3 +- .../ppl/big5/range_auto_date_histo.yaml | 3 +- .../range_auto_date_histo_with_metrics.yaml | 3 +- ..._conjunction_big_range_big_term_query.yaml | 4 +- ...onjunction_small_range_big_term_query.yaml | 3 +- ...junction_small_range_small_term_query.yaml | 4 +- ...isjunction_big_range_small_term_query.yaml | 4 +- .../ppl/big5/range_numeric.yaml | 4 +- .../ppl/big5/range_with_asc_sort.yaml | 4 +- .../ppl/big5/range_with_desc_sort.yaml | 4 +- .../expectedOutput/ppl/big5/scroll.yaml | 3 +- .../big5/sort_keyword_can_match_shortcut.yaml | 4 +- .../sort_keyword_no_can_match_shortcut.yaml | 4 +- .../ppl/big5/sort_numeric_asc.yaml | 3 +- .../ppl/big5/sort_numeric_asc_with_match.yaml | 4 +- .../ppl/big5/sort_numeric_desc.yaml | 3 +- .../big5/sort_numeric_desc_with_match.yaml | 4 +- .../expectedOutput/ppl/big5/term.yaml | 4 +- .../ppl/big5/terms_significant_1.yaml | 5 +- .../ppl/big5/terms_significant_2.yaml | 3 +- .../ppl/explain_filter_agg_push.yaml | 4 +- ...plain_filter_push_compare_date_string.yaml | 3 +- ...plain_filter_push_compare_time_string.yaml | 3 +- ..._filter_push_compare_timestamp_string.yaml | 3 +- .../ppl/explain_filter_then_limit_push.yaml | 3 +- ...eyword_like_function_case_insensitive.yaml | 3 +- .../ppl/explain_limit_10_5_push.yaml | 4 +- .../ppl/explain_limit_10_filter_5_push.yaml | 5 +- .../explain_limit_10from1_10from2_push.yaml | 4 +- .../ppl/explain_limit_5_10_push.yaml | 4 +- .../ppl/explain_limit_offsets_push.yaml | 4 +- .../ppl/explain_limit_push.yaml | 5 +- .../ppl/explain_limit_then_filter_push.yaml | 5 +- .../ppl/explain_limit_then_sort_push.yaml | 4 +- .../expectedOutput/ppl/explain_output.yaml | 5 +- .../ppl/explain_patterns_simple_pattern.yaml | 3 +- ...lain_patterns_simple_pattern_agg_push.yaml | 5 +- .../ppl/explain_sort_count_push.yaml | 4 +- .../ppl/explain_sort_then_limit_push.yaml | 4 +- .../ppl/explain_stats_by_timespan.yaml | 4 +- .../ppl/explain_stats_by_timespan2.yaml | 4 +- .../expectedOutput/ppl/explain_take.yaml | 5 +- .../ppl/explain_text_like_function.yaml | 3 +- ...n_text_like_function_case_insensitive.yaml | 3 +- .../ppl/explain_trendline_push.yaml | 5 +- .../ppl/explain_trendline_sort_push.yaml | 5 +- .../opensearch/request/AggregateAnalyzer.java | 7 +- .../request/OpenSearchQueryRequest.java | 131 +++++++++++++----- .../request/OpenSearchRequestBuilder.java | 26 +++- .../request/OpenSearchScrollRequest.java | 2 +- .../response/OpenSearchResponse.java | 49 +++++-- .../opensearch/storage/OpenSearchIndex.java | 17 ++- .../scan/AbstractCalciteIndexScan.java | 32 +---- .../storage/scan/BackgroundSearchScanner.java | 41 +++++- .../scan/CalciteEnumerableIndexScan.java | 1 + .../storage/scan/CalciteLogicalIndexScan.java | 50 ++++--- .../scan/OpenSearchIndexEnumerator.java | 12 +- .../scan/context/AggPushDownAction.java | 13 +- .../context/AggregationBuilderAction.java | 2 +- .../scan/context/OSRequestBuilderAction.java | 2 +- .../storage/scan/context/PushDownContext.java | 24 ++-- .../client/OpenSearchNodeClientTest.java | 2 +- .../client/OpenSearchRestClientTest.java | 4 +- .../request/OpenSearchQueryRequestTest.java | 48 +++---- .../request/OpenSearchRequestBuilderTest.java | 34 ++--- .../response/OpenSearchResponseTest.java | 31 +++-- .../scan/BackgroundSearchScannerTest.java | 21 +-- .../OpenSearchIndexScanPaginationTest.java | 2 +- .../storage/scan/OpenSearchIndexScanTest.java | 8 +- .../calcite/CalcitePPLAggregationTest.java | 81 +++++++++++ 190 files changed, 1120 insertions(+), 503 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintStrategyTable.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationPaginatingIT.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchPaginatingIT.java create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having1.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having2.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having3.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_from.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size1.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size2.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size3.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join2.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.yaml delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json delete mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_append_command.json create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_append_command.yaml diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/LogicalSystemLimit.java b/core/src/main/java/org/opensearch/sql/calcite/plan/LogicalSystemLimit.java index 6e46b63b976..84c448c8387 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/LogicalSystemLimit.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/LogicalSystemLimit.java @@ -39,17 +39,6 @@ public enum SystemLimitType { @Getter private final SystemLimitType type; - private LogicalSystemLimit( - SystemLimitType type, - RelOptCluster cluster, - RelTraitSet traitSet, - RelNode input, - RelCollation collation, - @Nullable RexNode offset, - @Nullable RexNode fetch) { - this(type, cluster, traitSet, Collections.emptyList(), input, collation, offset, fetch); - } - private LogicalSystemLimit( SystemLimitType type, RelOptCluster cluster, @@ -76,7 +65,8 @@ public static LogicalSystemLimit create( RelCollation collation = collations == null ? null : collations.get(0); collation = RelCollationTraitDef.INSTANCE.canonize(collation); RelTraitSet traitSet = input.getTraitSet().replace(Convention.NONE).replace(collation); - return new LogicalSystemLimit(type, cluster, traitSet, input, collation, offset, fetch); + return new LogicalSystemLimit( + type, cluster, traitSet, Collections.emptyList(), input, collation, offset, fetch); } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintStrategyTable.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintStrategyTable.java new file mode 100644 index 00000000000..84a8b437887 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PPLHintStrategyTable.java @@ -0,0 +1,33 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.utils; + +import com.google.common.base.Suppliers; +import java.util.function.Supplier; +import lombok.experimental.UtilityClass; +import org.apache.calcite.rel.hint.HintStrategyTable; +import org.apache.calcite.rel.logical.LogicalAggregate; + +@UtilityClass +public class PPLHintStrategyTable { + + private static final Supplier HINT_STRATEGY_TABLE = + Suppliers.memoize( + () -> + HintStrategyTable.builder() + .hintStrategy( + "stats_args", + (hint, rel) -> { + return rel instanceof LogicalAggregate; + }) + // add more here + .build()); + + /** Update the HINT_STRATEGY_TABLE when you create a new hint. */ + public static HintStrategyTable getHintStrategyTable() { + return HINT_STRATEGY_TABLE.get(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 50e03fc608f..f00feed651e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -29,7 +29,6 @@ import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; -import org.apache.calcite.rel.hint.HintStrategyTable; import org.apache.calcite.rel.hint.RelHint; import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalFilter; @@ -605,16 +604,7 @@ static void addIgnoreNullBucketHintToAggregate(RelBuilder relBuilder) { assert relBuilder.peek() instanceof LogicalAggregate : "Stats hits should be added to LogicalAggregate"; relBuilder.hints(statHits); - relBuilder - .getCluster() - .setHintStrategies( - HintStrategyTable.builder() - .hintStrategy( - "stats_args", - (hint, rel) -> { - return rel instanceof LogicalAggregate; - }) - .build()); + relBuilder.getCluster().setHintStrategies(PPLHintStrategyTable.getHintStrategyTable()); } /** Extract the RexLiteral from the aggregate call if the aggregate call is a LITERAL_AGG. */ diff --git a/core/src/main/java/org/opensearch/sql/exception/NonFallbackCalciteException.java b/core/src/main/java/org/opensearch/sql/exception/NonFallbackCalciteException.java index e11583178be..fae0af228dd 100644 --- a/core/src/main/java/org/opensearch/sql/exception/NonFallbackCalciteException.java +++ b/core/src/main/java/org/opensearch/sql/exception/NonFallbackCalciteException.java @@ -11,4 +11,8 @@ public class NonFallbackCalciteException extends QueryEngineException { public NonFallbackCalciteException(String message) { super(message); } + + public NonFallbackCalciteException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/docs/user/optimization/optimization.rst b/docs/user/optimization/optimization.rst index f60b51bddac..0af19e9f9ed 100644 --- a/docs/user/optimization/optimization.rst +++ b/docs/user/optimization/optimization.rst @@ -44,7 +44,7 @@ The consecutive Filter operator will be merged as one Filter operator:: { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, searchDone=false)" + "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}})" }, "children": [] } @@ -71,7 +71,7 @@ The Filter operator should be push down under Sort operator:: { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, searchDone=false)" + "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]})" }, "children": [] } @@ -102,7 +102,7 @@ The Project list will push down to Query DSL to `filter the source 10")); + expected = loadExpectedPlan("explain_agg_paginating_having2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats bucket_nullable = false count() by" + + " state | where `count()` > 10")); + expected = loadExpectedPlan("explain_agg_paginating_having3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats avg(balance) as avg, count() as cnt" + + " by state | eval new_avg = avg + 1000, new_cnt = cnt + 1 | where new_avg >" + + " 1000 or new_cnt > 1")); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testPaginatingAggForJoin() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + try { + setQueryBucketSize(2); + String expected = loadExpectedPlan("explain_agg_paginating_join1.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats count() as c by state | join left=l" + + " right=r on l.state=r.state [ source=opensearch-sql_test_index_bank | stats" + + " count() as c by state ]")); + expected = loadExpectedPlan("explain_agg_paginating_join2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats bucket_nullable = false count() as" + + " c by state | join left=l right=r on l.state=r.state [" + + " source=opensearch-sql_test_index_bank | stats bucket_nullable = false" + + " count() as c by state ]")); + expected = loadExpectedPlan("explain_agg_paginating_join3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats count() as c by state | join" + + " type=inner state [ source=opensearch-sql_test_index_bank | stats count()" + + " as c by state ]")); + expected = loadExpectedPlan("explain_agg_paginating_join4.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats count() as c by state | head 10" + + " | join type=inner state [ source=opensearch-sql_test_index_account" + + " | stats count() as c by state ]")); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testPaginatingAggForHeadFrom() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + try { + setQueryBucketSize(2); + String expected = loadExpectedPlan("explain_agg_paginating_head_from.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account | stats count() as c by state | head 10" + + " from 2")); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testPaginatingHeadSizeNoLessThanQueryBucketSize() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + try { + setQueryBucketSize(2); + String expected = + loadExpectedPlan("explain_agg_paginating_head_size_query_bucket_size1.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | stats count() by age | sort -age | head 3", TEST_INDEX_BANK))); + expected = loadExpectedPlan("explain_agg_paginating_head_size_query_bucket_size2.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | stats count() by age | sort -age | head 2", TEST_INDEX_BANK))); + expected = loadExpectedPlan("explain_agg_paginating_head_size_query_bucket_size3.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + String.format( + "source=%s | stats count() by age | sort -age | head 1", TEST_INDEX_BANK))); + } finally { + resetQueryBucketSize(); + } + } + @Test public void testExplainSortOnMeasure() throws IOException { enabledOnlyWhenPushdownIsEnabled(); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index e0872dc543c..9710d2f4415 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -1267,6 +1267,16 @@ public void testLimitAfterAggregation() throws IOException { "source=%s | stats count() by age | sort -age | head 3", TEST_INDEX_BANK)); verifySchema(response, schema("count()", "bigint"), schema("age", "int")); verifyDataRows(response, rows(1, 39), rows(2, 36), rows(1, 34)); + response = + executeQuery( + String.format( + "source=%s | stats count() by age | sort -age | head 2", TEST_INDEX_BANK)); + verifyDataRows(response, rows(1, 39), rows(2, 36)); + response = + executeQuery( + String.format( + "source=%s | stats count() by age | sort -age | head 1", TEST_INDEX_BANK)); + verifyDataRows(response, rows(1, 39)); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationPaginatingIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationPaginatingIT.java new file mode 100644 index 00000000000..4fef36ce76a --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationPaginatingIT.java @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import org.junit.After; + +public class CalcitePPLAggregationPaginatingIT extends CalcitePPLAggregationIT { + + @Override + public void init() throws Exception { + super.init(); + setQueryBucketSize(2); + } + + @After + public void tearDown() throws Exception { + resetQueryBucketSize(); + super.tearDown(); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStatsCommandIT.java index 19f95ec6c4e..8172bf81e62 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStatsCommandIT.java @@ -5,6 +5,14 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; import org.opensearch.sql.ppl.StatsCommandIT; public class CalciteStatsCommandIT extends StatsCommandIT { @@ -14,4 +22,88 @@ public void init() throws Exception { enableCalcite(); setQuerySizeLimit(2000); } + + @Test + public void testPaginatingStatsForHaving() throws IOException { + try { + setQueryBucketSize(2); + JSONObject response = + executeQuery( + String.format( + "source=%s | stats sum(balance) as a by state | where a > 780000", + TEST_INDEX_ACCOUNT)); + verifyDataRows(response, rows(782199, "TX")); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testPaginatingStatsForJoin() throws IOException { + try { + setQueryBucketSize(2); + JSONObject response = + executeQuery( + String.format( + "source=%s | stats sum(balance) as a by state | join left=l right=r on l.state =" + + " r.state [ source = %s | stats sum(balance) as a by state ]", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + verifyDataRows( + response, + rows(648774, "IL", 39225, "IL"), + rows(346934, "IN", 48086, "IN"), + rows(732523, "MD", 4180, "MD"), + rows(531785, "PA", 40540, "PA"), + rows(709135, "TN", 5686, "TN"), + rows(489601, "VA", 32838, "VA"), + rows(483741, "WA", 16418, "WA")); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testPaginatingStatsForJoinField() throws IOException { + try { + setQueryBucketSize(2); + JSONObject response = + executeQuery( + String.format( + "source=%s | stats sum(balance) as a by state | join type=inner state " + + "[ source = %s | stats sum(balance) as a by state ]", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + verifyDataRows( + response, + rows(39225, "IL"), + rows(48086, "IN"), + rows(4180, "MD"), + rows(40540, "PA"), + rows(5686, "TN"), + rows(32838, "VA"), + rows(16418, "WA")); + } finally { + resetQueryBucketSize(); + } + } + + @Test + public void testPaginatingStatsForHeadFrom() throws IOException { + try { + setQueryBucketSize(2); + JSONObject response = + executeQuery( + String.format( + "source=%s | stats sum(balance) as a by state | sort - a | head 5 from 2", + TEST_INDEX_ACCOUNT, TEST_INDEX_BANK)); + verifyDataRows( + response, + rows(710408, "MA"), + rows(709135, "TN"), + rows(657957, "ID"), + rows(648774, "IL"), + rows(643489, "AL")); + } finally { + resetQueryBucketSize(); + } + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java index a607dc39f2b..29c84a947aa 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java @@ -113,6 +113,7 @@ private Settings defaultSettings() { new ImmutableMap.Builder() .put(Key.QUERY_SIZE_LIMIT, 200) .put(Key.QUERY_BUCKET_SIZE, 1000) + .put(Key.SEARCH_MAX_BUCKETS, 65535) .put(Key.SQL_CURSOR_KEEP_ALIVE, TimeValue.timeValueMinutes(1)) .put(Key.FIELD_TYPE_TOLERANCE, true) .put(Key.CALCITE_ENGINE_ENABLED, true) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchPaginatingIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchPaginatingIT.java new file mode 100644 index 00000000000..a9de5f732ae --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/tpch/CalcitePPLTpchPaginatingIT.java @@ -0,0 +1,25 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.tpch; + +import org.junit.After; +import org.opensearch.sql.util.Retry; + +@Retry +public class CalcitePPLTpchPaginatingIT extends CalcitePPLTpchIT { + + @Override + public void init() throws Exception { + super.init(); + setQueryBucketSize(2); + } + + @After + public void tearDown() throws Exception { + resetQueryBucketSize(); + super.tearDown(); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java index 062880411ef..2d120ffc1e5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java @@ -167,6 +167,7 @@ private Settings defaultSettings() { new ImmutableMap.Builder() .put(Key.QUERY_SIZE_LIMIT, 200) .put(Key.QUERY_BUCKET_SIZE, 1000) + .put(Key.SEARCH_MAX_BUCKETS, 65535) .put(Key.SQL_CURSOR_KEEP_ALIVE, TimeValue.timeValueMinutes(1)) .put(Key.FIELD_TYPE_TOLERANCE, true) .build(); diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index bf9b214bd44..079e332ef32 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -421,7 +421,7 @@ public static void assertJsonEqualsIgnoreId(String expected, String actual) { } private static String cleanUpId(String s) { - return eliminateTimeStamp(eliminatePid(eliminateRelId(s))); + return eliminateTimeStamp(eliminatePid(eliminateRelId(eliminateRequestOptions(s)))); } private static String eliminateTimeStamp(String s) { @@ -434,14 +434,17 @@ private static String eliminateRelId(String s) { .replaceAll("LogicalProject#\\d+", "LogicalProject#"); } + private static String eliminateRequestOptions(String s) { + return s.replaceAll(" needClean=true,", "").replaceAll(" searchDone=false,", ""); + } + private static String eliminatePid(String s) { return s.replaceAll("pitId=[^,]+,", "pitId=*,"); } /** Compare two YAML strings are equals with ignoring the RelNode id in the Calcite plan. */ public static void assertYamlEqualsIgnoreId(String expectedYaml, String actualYaml) { - String cleanedYaml = cleanUpYaml(actualYaml); - assertYamlEquals(expectedYaml, cleanedYaml); + assertYamlEquals(cleanUpYaml(expectedYaml), cleanUpYaml(actualYaml)); } public static void assertYamlEquals(String expected, String actual) { @@ -460,7 +463,9 @@ private static String cleanUpYaml(String s) { .replaceAll("rel#\\d+", "rel#") .replaceAll("RelSubset#\\d+", "RelSubset#") .replaceAll("LogicalProject#\\d+", "LogicalProject#") - .replaceAll("pitId=[^,]+,", "pitId=*,"); + .replaceAll("pitId=[^,]+,", "pitId=*,") + .replaceAll(" needClean=true,", "") + .replaceAll(" searchDone=false,", ""); } private static String jsonToYaml(String json) { diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml index c9929d83e81..b7405bf3963 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGFHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNFQVJDSCIsCiAgICAgICAgImtpbmQiOiAiU0VBUkNIIiwKICAgICAgICAic3ludGF4IjogIklOVEVSTkFMIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogewogICAgICAgICAgICAicmFuZ2VTZXQiOiBbCiAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgImNsb3NlZCIsCiAgICAgICAgICAgICAgICAiMzAiLAogICAgICAgICAgICAgICAgIjQwIgogICAgICAgICAgICAgIF0KICAgICAgICAgICAgXSwKICAgICAgICAgICAgIm51bGxBcyI6ICJVTktOT1dOIgogICAgICAgICAgfSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiA0LAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,0,2,2],"DIGESTS":["age",30,"u30","age","u40","u100"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQGFHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNFQVJDSCIsCiAgICAgICAgImtpbmQiOiAiU0VBUkNIIiwKICAgICAgICAic3ludGF4IjogIklOVEVSTkFMIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogewogICAgICAgICAgICAicmFuZ2VTZXQiOiBbCiAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgImNsb3NlZCIsCiAgICAgICAgICAgICAgICAiMzAiLAogICAgICAgICAgICAgICAgIjQwIgogICAgICAgICAgICAgIF0KICAgICAgICAgICAgXSwKICAgICAgICAgICAgIm51bGxBcyI6ICJVTktOT1dOIgogICAgICAgICAgfSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiA0LAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,0,2,2],"DIGESTS":["age",30,"u30","age","u40","u100"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml index 20199016fb4..9ce326d7c7d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDJXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,1],"DIGESTS":["age",35,"u35","email"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQDJXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,1],"DIGESTS":["age",35,"u35","email"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml index f50f5e44582..d4670d6dede 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 30, 100)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC9HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,2],"DIGESTS":["age",30,30,100]}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC9HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2,2],"DIGESTS":["age",30,30,100]}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml index 353bcf5c1e9..6d12ca7285e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], state=[$9], gender=[$4], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg(balance)=AVG($3),count()=COUNT()), PROJECT->[avg(balance), count(), age_range, state, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"a30","from":30.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg(balance)=AVG($3),count()=COUNT()), PROJECT->[avg(balance), count(), age_range, state, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"a30","from":30.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml index eef2a7b23f8..4bbafd2928a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, 'a35':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], state=[$9], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg_balance=AVG($3)), PROJECT->[avg_balance, age_range, balance_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u35","to":35.0},{"key":"a35","from":35.0}],"keyed":true},"aggregations":{"balance_range":{"range":{"field":"balance","ranges":[{"key":"medium","to":20000.0},{"key":"high","from":20000.0}],"keyed":true},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg_balance=AVG($3)), PROJECT->[avg_balance, age_range, balance_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u35","to":35.0},{"key":"a35","from":35.0}],"keyed":true},"aggregations":{"balance_range":{"range":{"field":"balance","ranges":[{"key":"medium","to":20000.0},{"key":"high","from":20000.0}],"keyed":true},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml index 14ee0aff97b..0c78841e2a1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(@timestamp=[$0], category=[$1], value=[$2], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], timestamp=[WIDTH_BUCKET($3, 3, -(MAX($3) OVER (), MIN($3) OVER ()), MAX($3) OVER ())], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'great':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg(value)=AVG($3),count()=COUNT()), PROJECT->[avg(value), count(), timestamp, value_range, category], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg(value)=AVG($3),count()=COUNT()), PROJECT->[avg(value), count(), timestamp, value_range, category], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml index 30e4762d325..3c116fc7b44 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml index 065598bc82c..605111dfc86 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(state=[$9], age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), state, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"a30","from":30.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), state, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"a30","from":30.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml index 56dec15223d..6f0e2fdae76 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_date_histogram_daily.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2022-12-30 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-07 12:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2022-12-30 00:00:00':VARCHAR..'2023-01-07 12:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1d)], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2022-12-30T00:00:00.000Z","to":"2023-01-07T12:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"span(`@timestamp`,1d)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml index 6d3ef26ee3e..9be838e68b8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, cloud.region], SORT->[1 DESC LAST, 2 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml index 2b48025c015..9cab2b0de11 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/composite_terms_keyword.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-02 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-02 10:00:00':VARCHAR)), IS NOT NULL($7), IS NOT NULL($14), IS NOT NULL($34))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}},{"exists":{"field":"aws.cloudwatch.log_stream","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, cloud.region, @timestamp, aws.cloudwatch.log_stream], FILTER->AND(SEARCH($2, Sarg[['2023-01-02 00:00:00':VARCHAR..'2023-01-02 10:00:00':VARCHAR)]:VARCHAR), IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},count()=COUNT()), PROJECT->[count(), process.name, cloud.region, aws.cloudwatch.log_stream], SORT->[1 DESC LAST, 2 ASC FIRST, 3 ASC FIRST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"@timestamp":{"from":"2023-01-02T00:00:00.000Z","to":"2023-01-02T10:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"process.name","boost":1.0}},{"exists":{"field":"cloud.region","boost":1.0}},{"exists":{"field":"aws.cloudwatch.log_stream","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["process.name","cloud.region","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":false,"order":"desc"}}},{"cloud.region":{"terms":{"field":"cloud.region","missing_bucket":false,"order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml index a97ca073a21..2982377d2ca 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_hourly_agg.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($17)]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1h)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml index 3d5639f94c0..7bccacdf5af 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/date_histogram_minute_agg.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[@timestamp], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(`@timestamp`,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["@timestamp"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"span(`@timestamp`,1m)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml index 2f3aab7b147..6e5b68f495c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_1.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($1, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), aws.cloudwatch.log_stream, process.name], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"process.name":{"terms":{"field":"process.name","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($1, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), aws.cloudwatch.log_stream, process.name], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"process.name":{"terms":{"field":"process.name","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml index cf04d9b8695..5d9f9d1f579 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/big5/terms_significant_2.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(>=($17, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($17, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($1, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, aws.cloudwatch.log_stream], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[process.name, @timestamp, aws.cloudwatch.log_stream], FILTER->SEARCH($1, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), process.name, aws.cloudwatch.log_stream], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"@timestamp":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["process.name","@timestamp","aws.cloudwatch.log_stream"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"process.name":{"terms":{"field":"process.name","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"aws.cloudwatch.log_stream":{"terms":{"field":"aws.cloudwatch.log_stream","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_single_group_key.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_single_group_key.yaml index b011edc42f5..f8200b10a81 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_single_group_key.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_single_group_key.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[FILTER->AND(IS NOT NULL($4), IS NOT NULL($7)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[FILTER->AND(IS NOT NULL($4), IS NOT NULL($7)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"gender","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_integer_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_integer_span.yaml index c47fb9dd438..4fcc802063f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_integer_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_integer_span.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},max(balance)=MAX($0)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"age","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age0":{"histogram":{"field":"age","missing_bucket":false,"order":"asc","interval":10.0}}}]},"aggregations":{"max(balance)":{"max":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance, age], FILTER->AND(IS NOT NULL($1), IS NOT NULL($0)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},max(balance)=MAX($0)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"age","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["balance","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age0":{"histogram":{"field":"age","missing_bucket":false,"order":"asc","interval":10.0}}}]},"aggregations":{"max(balance)":{"max":{"field":"balance"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_limit.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_limit.yaml index 389825459df..e4578f21fcd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_limit.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_limit.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[gender, balance, state], FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"state","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["gender","balance","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[gender, balance, state], FILTER->AND(IS NOT NULL($2), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"state","boost":1.0}},{"exists":{"field":"balance","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["gender","balance","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml index a07c92033d0..8768b05a1a0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/chart_with_timestamp_span.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count=COUNT()), SORT->[0], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"@timestamp0":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml index 59742c27ae9..7b3603d66aa 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q18.yaml @@ -8,4 +8,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($84), IS NOT NULL($63))]) CalciteLogicalIndexScan(table=[[OpenSearch, hits]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"UserID":{"terms":{"field":"UserID","missing_bucket":false,"order":"asc"}}},{"SearchPhrase":{"terms":{"field":"SearchPhrase","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), UserID, SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10,"sources":[{"UserID":{"terms":{"field":"UserID","missing_bucket":false,"order":"asc"}}},{"SearchPhrase":{"terms":{"field":"SearchPhrase","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml index 2bf72da9393..fbc4dd965dd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q28.yaml @@ -13,4 +13,4 @@ calcite: EnumerableLimit(fetch=[25]) EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[100000], expr#4=[>($t1, $t3)], proj#0..2=[{exprs}], $condition=[$t4]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, CounterID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($1),c=COUNT()), PROJECT->[l, c, CounterID]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"CounterID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"CounterID":{"terms":{"field":"CounterID","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["URL"]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, CounterID], FILTER->AND(<>($0, ''), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($1),c=COUNT()), PROJECT->[l, c, CounterID]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"CounterID","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"CounterID":{"terms":{"field":"CounterID","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["URL"]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml index 97fdce0a1c0..08776c92111 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q29.yaml @@ -15,4 +15,4 @@ calcite: EnumerableLimit(fetch=[25]) EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[100000], expr#5=[>($t1, $t4)], proj#0..3=[{exprs}], $condition=[$t5]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[Referer], FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($2),c=COUNT(),min(Referer)=MIN($1)), PROJECT->[l, c, min(Referer), k]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"Referer","boost":1.0}}],"must_not":[{"term":{"Referer":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["Referer"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"k":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDIKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["Referer","^https?://(?:www\\.)?([^/]+)/.*$","$1"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["Referer"]}}}},"min(Referer)":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"Referer"}],"sort":[{"Referer":{"order":"asc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[Referer], FILTER-><>($0, ''), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},l=AVG($2),c=COUNT(),min(Referer)=MIN($1)), PROJECT->[l, c, min(Referer), k]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"Referer","boost":1.0}}],"must_not":[{"term":{"Referer":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["Referer"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"k":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDIKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["Referer","^https?://(?:www\\.)?([^/]+)/.*$","$1"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"l":{"avg":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["Referer"]}}}},"min(Referer)":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"Referer"}],"sort":[{"Referer":{"order":"asc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml index 820898e401d..d261f22d6f5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/clickbench/q43.yaml @@ -11,4 +11,4 @@ calcite: EnumerableCalc(expr#0..1=[{inputs}], PageViews=[$t1], M=[$t0]) EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[1000], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"exists":{"field":"EventTime","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[EventDate, EventTime, DontCountHits, IsRefresh, CounterID], FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':VARCHAR..'2013-07-15 00:00:00':VARCHAR]]:VARCHAR), =($3, 0), =($2, 0), IS NOT NULL($1)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},PageViews=COUNT()), SORT->[0 ASC FIRST], LIMIT->[10 from 1000]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-15T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"exists":{"field":"EventTime","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["EventDate","EventTime","DontCountHits","IsRefresh","CounterID"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1010,"sources":[{"M":{"date_histogram":{"field":"EventTime","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=1010, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by1.yaml index 722be1bec9f..e731c2d3433 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by1.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], c1=[$t1], gender=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by2.yaml index 61724171f7a..026d83de0e8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by2.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], c1=[$t1], c2=[$t1], gender=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c1=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"c1":{"value_count":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c1=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"c1":{"value_count":{"field":"balance"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by3.yaml index a4dfdb25064..4e49ba34da0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by3.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(gender=[$4], account_number=[$0], account_number_alias=[$0]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count(account_number)=COUNT($1),c2=COUNT($2)), PROJECT->[count(account_number), c2, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(account_number)":{"value_count":{"field":"account_number"}},"c2":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count(account_number)=COUNT($1),c2=COUNT($2)), PROJECT->[count(account_number), c2, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(account_number)":{"value_count":{"field":"account_number"}},"c2":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml index 77fc6c6eadf..039d6668fd8 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(gender=[$4], account_number=[$0]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT(),count(account_number)=COUNT($1)), PROJECT->[count(), count(account_number), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(account_number)":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT(),count(account_number)=COUNT($1)), PROJECT->[count(), count(account_number), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(account_number)":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by5.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by5.yaml index dc7bf3629f2..40e1333630a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by5.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by5.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(gender=[$4], balance=[$3], account_number=[$0]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count(balance)=COUNT($1),count(account_number)=COUNT($2)), PROJECT->[count(balance), count(account_number), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(balance)":{"value_count":{"field":"balance"}},"count(account_number)":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count(balance)=COUNT($1),count(account_number)=COUNT($2)), PROJECT->[count(balance), count(account_number), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(balance)":{"value_count":{"field":"balance"}},"count(account_number)":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by6.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by6.yaml index b0b23b7ce10..138cdec9369 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by6.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by6.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(gender=[$4], b_1=[+($3, 1)], $f3=[POWER($3, 2)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count(b_1)=COUNT($1),c3=COUNT($2)), PROJECT->[count(b_1), c3, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(b_1)":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",1]}}}},"c3":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBVXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJQT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",2]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count(b_1)=COUNT($1),c3=COUNT($2)), PROJECT->[count(b_1), c3, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(b_1)":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",1]}}}},"c3":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBVXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJQT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",2]}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_group_merge.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_group_merge.yaml index aaf4f2017cb..acd95f0ec63 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_group_merge.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_group_merge.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[10], expr#3=[*($t0, $t2)], expr#4=[+($t0, $t2)], count()=[$t1], age1=[$t3], age2=[$t4], age3=[$t2], age=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having1.yaml new file mode 100644 index 00000000000..bc06e988e1f --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having1.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalFilter(condition=[>($0, 10)]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[10], expr#3=[>($t0, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having2.yaml new file mode 100644 index 00000000000..8f1a667a604 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having2.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalFilter(condition=[>($0, 10)]) + LogicalProject(count()=[$1], state=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(state=[$7]) + LogicalFilter(condition=[IS NOT NULL($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[10], expr#3=[>($t0, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having3.yaml new file mode 100644 index 00000000000..e7589d8109d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_having3.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalFilter(condition=[OR(>($3, 1000), >($4, 1))]) + LogicalProject(avg=[$1], cnt=[$2], state=[$0], new_avg=[+($1, 1000)], new_cnt=[+($2, 1)]) + LogicalAggregate(group=[{0}], avg=[AVG($1)], cnt=[COUNT()]) + LogicalProject(state=[$7], balance=[$3]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1000], expr#4=[+($t1, $t3)], expr#5=[1], expr#6=[+($t2, $t5)], expr#7=[>($t4, $t3)], expr#8=[>($t6, $t5)], expr#9=[OR($t7, $t8)], avg=[$t1], cnt=[$t2], state=[$t0], new_avg=[$t4], new_cnt=[$t6], $condition=[$t9]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg=AVG($1),cnt=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_from.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_from.yaml new file mode 100644 index 00000000000..cfe1a0a29df --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_from.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(offset=[2], fetch=[10]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableLimit(offset=[2], fetch=[10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size1.yaml new file mode 100644 index 00000000000..5663ba56f9b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size1.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[DESC-nulls-last], fetch=[3]) + LogicalProject(count()=[$1], age=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age], SORT->[1 DESC LAST], LIMIT->3, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"last","order":"desc"}}}]}}}}, requestedTotalSize=3, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size2.yaml new file mode 100644 index 00000000000..c60fc184061 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size2.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[DESC-nulls-last], fetch=[2]) + LogicalProject(count()=[$1], age=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age], SORT->[1 DESC LAST], LIMIT->2, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"last","order":"desc"}}}]}}}}, requestedTotalSize=2, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size3.yaml new file mode 100644 index 00000000000..835a72a6f55 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_head_size_query_bucket_size3.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$1], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$1], dir0=[DESC-nulls-last], fetch=[1]) + LogicalProject(count()=[$1], age=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age], SORT->[1 DESC LAST], LIMIT->1, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1,"sources":[{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"last","order":"desc"}}}]}}}}, requestedTotalSize=1, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml new file mode 100644 index 00000000000..ea76cdee61e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join1.yaml @@ -0,0 +1,19 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(c=[$0], state=[$1], r.c=[$2], r.state=[$3]) + LogicalJoin(condition=[=($1, $3)], joinType=[inner]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$9]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[inner]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000, SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join2.yaml new file mode 100644 index 00000000000..ca7ab3fc668 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join2.yaml @@ -0,0 +1,21 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(c=[$0], state=[$1], r.c=[$2], r.state=[$3]) + LogicalJoin(condition=[=($1, $3)], joinType=[inner]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$7]) + LogicalFilter(condition=[IS NOT NULL($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$9]) + LogicalFilter(condition=[IS NOT NULL($9)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableMergeJoin(condition=[=($1, $3)], joinType=[inner]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000, SORT->[1]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml new file mode 100644 index 00000000000..1326030ea7e --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join3.yaml @@ -0,0 +1,19 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(c=[$2], state=[$3]) + LogicalJoin(condition=[=($1, $3)], joinType=[inner]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$9]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableHashJoin(condition=[=($1, $2)], joinType=[semi]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), SORT->[0]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml new file mode 100644 index 00000000000..51d82f37450 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_paginating_join4.yaml @@ -0,0 +1,20 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(c=[$2], state=[$3]) + LogicalJoin(condition=[=($1, $3)], joinType=[inner]) + LogicalSort(fetch=[10]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalSystemLimit(fetch=[50000], type=[JOIN_SUBSEARCH_MAXOUT]) + LogicalProject(c=[$1], state=[$0]) + LogicalAggregate(group=[{0}], c=[COUNT()]) + LogicalProject(state=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableHashJoin(condition=[=($1, $2)], joinType=[semi]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT()), PROJECT->[c, state], LIMIT->50000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0}), LIMIT->10], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":2,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"last","order":"asc"}}}]}}}}, requestedTotalSize=10, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.yaml index 03ae462f020..ad53f3b5972 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_timestamp_push.yaml @@ -7,4 +7,4 @@ calcite: LogicalProject(t=[UNIX_TIMESTAMP($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), t], SORT->[1 ASC FIRST], LIMIT->3, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":3,"sources":[{"t":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQB4XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJVTklYX1RJTUVTVEFNUCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiRE9VQkxFIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["birthdate"]}},"missing_bucket":true,"value_type":"double","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), t], SORT->[1 ASC FIRST], LIMIT->3, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":3,"sources":[{"t":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQB4XsKICAib3AiOiB7CiAgICAibmFtZSI6ICJVTklYX1RJTUVTVEFNUCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiRE9VQkxFIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["birthdate"]}},"missing_bucket":true,"value_type":"double","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=3, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml index 483b5d2dfa0..fb0beac704b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_script_udt_arg_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], t=[DATE_ADD($3, 1:INTERVAL DAY)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(t,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(t,1d)":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFmXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTUEFOIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJEQVRFX0FERCIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAxCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAibnVsbGFibGUiOiB0cnVlLAogICAgInByZWNpc2lvbiI6IC0xCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["birthdate",1,"d"]}},"missing_bucket":false,"value_type":"long","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(t,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(t,1d)":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFmXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJTUEFOIiwKICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJEQVRFX0FERCIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImxpdGVyYWwiOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVSVkFMX0RBWSIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogMTAsCiAgICAgICAgICAgICJzY2FsZSI6IDYKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInVkdCI6ICJFWFBSX1RJTUVTVEFNUCIsCiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAxCiAgICAgIH0KICAgIH0KICBdLAogICJjbGFzcyI6ICJvcmcub3BlbnNlYXJjaC5zcWwuZXhwcmVzc2lvbi5mdW5jdGlvbi5Vc2VyRGVmaW5lZEZ1bmN0aW9uQnVpbGRlciQxIiwKICAidHlwZSI6IHsKICAgICJ1ZHQiOiAiRVhQUl9USU1FU1RBTVAiLAogICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAibnVsbGFibGUiOiB0cnVlLAogICAgInByZWNpc2lvbiI6IC0xCiAgfSwKICAiZGV0ZXJtaW5pc3RpYyI6IHRydWUsCiAgImR5bmFtaWMiOiBmYWxzZQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["birthdate",1,"d"]}},"missing_bucket":false,"value_type":"long","order":"asc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_distinct_count_approx_enhancement.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_distinct_count_approx_enhancement.json index a8d07145562..ceeaa784c82 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_distinct_count_approx_enhancement.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_distinct_count_approx_enhancement.json @@ -1,6 +1,6 @@ { "calcite":{ "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(distinct_states=[$1], gender=[$0])\n LogicalAggregate(group=[{0}], distinct_states=[DISTINCT_COUNT_APPROX($1)])\n LogicalProject(gender=[$4], state=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical":"CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},distinct_states=DISTINCT_COUNT_APPROX($1)), PROJECT->[distinct_states, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"distinct_states\":{\"cardinality\":{\"field\":\"state.keyword\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical":"CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},distinct_states=DISTINCT_COUNT_APPROX($1)), PROJECT->[distinct_states, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"distinct_states\":{\"cardinality\":{\"field\":\"state.keyword\"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml index 0726597ef90..265e25dd417 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_script.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..1=[{inputs}], expr#2=[CHAR_LENGTH($t0)], sum=[$t1], len=[$t2], gender=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum=SUM($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"sum":{"sum":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",100]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum=SUM($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"sum":{"sum":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQBRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICIrIiwKICAgICJraW5kIjogIlBMVVMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",100]}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.yaml index 8fd23589e33..88b1e828eb6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_with_sum_enhancement.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | EnumerableCalc(expr#0..3=[{inputs}], expr#4=[100], expr#5=[*($t2, $t4)], expr#6=[+($t1, $t5)], expr#7=[-($t1, $t5)], expr#8=[*($t1, $t4)], sum(balance)=[$t1], sum(balance + 100)=[$t6], sum(balance - 100)=[$t7], sum(balance * 100)=[$t8], sum(balance / 100)=[$t3], gender=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum(balance)=SUM($1),sum(balance + 100)_COUNT=COUNT($1),sum(balance / 100)=SUM($2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"sum(balance + 100)_COUNT":{"value_count":{"field":"balance"}},"sum(balance / 100)":{"sum":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCEXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJESVZJREUiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",100]}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},sum(balance)=SUM($1),sum(balance + 100)_COUNT=COUNT($1),sum(balance / 100)=SUM($2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"sum(balance)":{"sum":{"field":"balance"}},"sum(balance + 100)_COUNT":{"value_count":{"field":"balance"}},"sum(balance / 100)":{"sum":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCEXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJESVZJREUiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0sCiAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICJ0eXBlIjogewogICAgInR5cGUiOiAiQklHSU5UIiwKICAgICJudWxsYWJsZSI6IHRydWUKICB9LAogICJkZXRlcm1pbmlzdGljIjogdHJ1ZSwKICAiZHluYW1pYyI6IGZhbHNlCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["balance",100]}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.json deleted file mode 100644 index 7a857107a13..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalUnion(all=[true])\n LogicalProject(cnt=[$1], gender=[$0])\n LogicalAggregate(group=[{0}], cnt=[COUNT($1)])\n LogicalProject(gender=[$4], balance=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(cnt=[$0], gender=[null:VARCHAR])\n LogicalAggregate(group=[{}], cnt=[COUNT()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), PROJECT->[cnt, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[null:VARCHAR], proj#0..1=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},cnt=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"track_total_hits\":2147483647}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" - } -} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.yaml new file mode 100644 index 00000000000..6180edc83f3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_append_command.yaml @@ -0,0 +1,17 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalUnion(all=[true]) + LogicalProject(cnt=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], cnt=[COUNT($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(cnt=[$0], gender=[null:VARCHAR]) + LogicalAggregate(group=[{}], cnt=[COUNT()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableUnion(all=[true]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), PROJECT->[cnt, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"cnt":{"value_count":{"field":"balance"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0=[{inputs}], expr#1=[null:VARCHAR], proj#0..1=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},cnt=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","track_total_hits":2147483647}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json index 6ec42972a10..1375ff21c2b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_appendpipe_command.json @@ -1,6 +1,6 @@ { "calcite": { "logical":"LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], cnt=[$19])\n LogicalUnion(all=[true])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12], _id=[$13], _index=[$14], _score=[$15], _maxscore=[$16], _sort=[$17], _routing=[$18], cnt=[null:BIGINT])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(account_number=[null:BIGINT], firstname=[null:VARCHAR], address=[null:VARCHAR], birthdate=[null:EXPR_TIMESTAMP VARCHAR], gender=[$0], city=[null:VARCHAR], lastname=[null:VARCHAR], balance=[null:BIGINT], employer=[null:VARCHAR], state=[null:VARCHAR], age=[null:INTEGER], email=[null:VARCHAR], male=[null:BOOLEAN], _id=[null:VARCHAR], _index=[null:VARCHAR], _score=[null:REAL], _maxscore=[null:REAL], _sort=[null:BIGINT], _routing=[null:VARCHAR], cnt=[$1])\n LogicalAggregate(group=[{0}], cnt=[COUNT($1)])\n LogicalProject(gender=[$4], balance=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical":"EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[null:BIGINT], proj#0..13=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=[null:EXPR_TIMESTAMP VARCHAR], expr#5=[null:INTEGER], expr#6=[null:BOOLEAN], account_number=[$t2], firstname=[$t3], address=[$t3], birthdate=[$t4], gender=[$t0], city=[$t3], lastname=[$t3], balance=[$t2], employer=[$t3], state=[$t3], age=[$t5], email=[$t3], male=[$t6], cnt=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical":"EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..12=[{inputs}], expr#13=[null:BIGINT], proj#0..13=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\",\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[null:BIGINT], expr#3=[null:VARCHAR], expr#4=[null:EXPR_TIMESTAMP VARCHAR], expr#5=[null:INTEGER], expr#6=[null:BOOLEAN], account_number=[$t2], firstname=[$t3], address=[$t3], birthdate=[$t4], gender=[$t0], city=[$t3], lastname=[$t3], balance=[$t2], employer=[$t3], state=[$t3], age=[$t5], email=[$t3], male=[$t6], cnt=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},cnt=COUNT($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"gender\":{\"terms\":{\"field\":\"gender.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml index 1b4c379f9b6..61017e4f76b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($4)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml index 6d5c76b1443..daca12143df 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml index af72f304286..b5d9a2194dc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml index a2385ccdbb2..3dc325d896b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml index c16dde54e22..4084a34e2d4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_keepempty_false_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml index c16dde54e22..4084a34e2d4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_push.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(account_number=[$0], gender=[$4], age=[$8]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest.yaml index 3dbd4ff8c20..3938446b735 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(server=[$1], message=[$3], @timestamp=[$2]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},earliest_message=ARG_MIN($1, $2),latest_message=ARG_MAX($1, $2)), PROJECT->[earliest_message, latest_message, server], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"server":{"terms":{"field":"server","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"earliest_message":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"message.keyword"}],"sort":[{"@timestamp":{"order":"asc"}}]}},"latest_message":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"message.keyword"}],"sort":[{"@timestamp":{"order":"desc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},earliest_message=ARG_MIN($1, $2),latest_message=ARG_MAX($1, $2)), PROJECT->[earliest_message, latest_message, server], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"server":{"terms":{"field":"server","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"earliest_message":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"message.keyword"}],"sort":[{"@timestamp":{"order":"asc"}}]}},"latest_message":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"message.keyword"}],"sort":[{"@timestamp":{"order":"desc"}}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest_custom_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest_custom_time.yaml index 4dc24423d9d..d38a91f0922 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest_custom_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_earliest_latest_custom_time.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(level=[$4], message=[$3], created_at=[$0]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},earliest_message=ARG_MIN($1, $2),latest_message=ARG_MAX($1, $2)), PROJECT->[earliest_message, latest_message, level], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"level":{"terms":{"field":"level","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"earliest_message":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"message.keyword"}],"sort":[{"created_at":{"order":"asc"}}]}},"latest_message":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"message.keyword"}],"sort":[{"created_at":{"order":"desc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},earliest_message=ARG_MIN($1, $2),latest_message=ARG_MAX($1, $2)), PROJECT->[earliest_message, latest_message, level], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"level":{"terms":{"field":"level","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"earliest_message":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"message.keyword"}],"sort":[{"created_at":{"order":"asc"}}]}},"latest_message":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"message.keyword"}],"sort":[{"created_at":{"order":"desc"}}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.json deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.yaml index c4f023585c2..f1b2c6a434f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_agg_push.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[>($8, 30)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state, city], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["city","state","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), PROJECT->[avg_age, state, city], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["city","state","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml index dd1f6444d09..9ac2557ab2e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR)]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_first_last.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_first_last.yaml index 30f505a24a7..e339a485677 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_first_last.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_first_last.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(gender=[$4], firstname=[$1]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},first_name=FIRST($1),last_name=LAST($1)), PROJECT->[first_name, last_name, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"first_name":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"firstname"}]}},"last_name":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"firstname"}],"sort":[{"_doc":{"order":"desc"}}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},first_name=FIRST($1),last_name=LAST($1)), PROJECT->[first_name, last_name, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"first_name":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"firstname"}]}},"last_name":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"firstname"}],"sort":[{"_doc":{"order":"desc"}}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown.json index 5fbc3abecf1..82c72695d0b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], state=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(state=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown2.yaml index 3a19f39e7da..9a3b2cfe48c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown2.yaml @@ -7,4 +7,4 @@ calcite: LogicalProject(state=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], LIMIT->100, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":100,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], LIMIT->100, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":100,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=100, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown3.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown3.json index 986459fd2f4..0f391511f22 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown3.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown3.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalSort(offset=[10], fetch=[10])\n LogicalSort(fetch=[100])\n LogicalProject(count()=[$1], state=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(state=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], state=[$t0])\n EnumerableLimit(offset=[10], fetch=[10])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":20,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], state=[$t0])\n EnumerableLimit(offset=[10], fetch=[10])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":20,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]}}}}, requestedTotalSize=20, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown4.yaml index d13ed6aa8d3..4df04144f06 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown4.yaml @@ -10,4 +10,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT->[1 ASC FIRST], LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":20,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT->[1 ASC FIRST], LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":20,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=20, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable1.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable1.yaml index b4117a5a84c..4ff9804f458 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable1.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable1.yaml @@ -12,4 +12,4 @@ calcite: EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], state=[$t0]) EnumerableLimit(offset=[10], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":20,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":20,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=20, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.yaml index 46e8cab736b..e4f66cf442d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_limit_agg_pushdown_bucket_nullable2.yaml @@ -11,4 +11,4 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableLimit(offset=[10], fetch=[10]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT->[1 ASC FIRST], LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":20,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), state], SORT->[1 ASC FIRST], LIMIT->100, LIMIT->[10 from 10]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":20,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]}}}}, requestedTotalSize=20, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml index 195971e53bc..30ad4020d5d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_output.yaml @@ -18,4 +18,4 @@ calcite: EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], proj#0..1=[{exprs}], $condition=[$t3]) EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) EnumerableCalc(expr#0=[{inputs}], expr#1=[2], expr#2=[+($t0, $t1)], expr#3=[IS NOT NULL($t0)], age2=[$t2], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), SORT->[0 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["city","state","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_age=AVG($2)), SORT->[0 ASC FIRST], PROJECT->[avg_age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["city","state","age"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"city":{"terms":{"field":"city.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml index dfcb6e6b7e0..cc07aa4bfc4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], patterns_field=[$t6], pattern_count=[$t1], tokens=[$t9], sample_logs=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQG6HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiU0VBUkNIIiwKICAgICAgICAia2luZCI6ICJTRUFSQ0giLAogICAgICAgICJzeW50YXgiOiAiSU5URVJOQUwiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJzaW5nbGV0b24iLAogICAgICAgICAgICAgICAgIntcInZhbHVlXCI6XCJcIixcImNoYXJzZXROYW1lXCI6XCJJU08tODg1OS0xXCIsXCJjb2xsYXRpb25cIjp7XCJjb2xsYXRpb25OYW1lXCI6XCJJU08tODg1OS0xJGVuX1VTJHByaW1hcnlcIixcImNvZXJjaWJpbGl0eVwiOlwiSU1QTElDSVRcIixcImxvY2FsZVwiOlwiZW5fVVNcIn0sXCJjaGFyc2V0XCI6XCJJU08tODg1OS0xXCIsXCJ2YWx1ZUJ5dGVzXCI6bnVsbH0iCiAgICAgICAgICAgICAgXQogICAgICAgICAgICBdLAogICAgICAgICAgICAibnVsbEFzIjogIlRSVUUiCiAgICAgICAgICB9LAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,2],"DIGESTS":["email.keyword","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQG6HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiU0VBUkNIIiwKICAgICAgICAia2luZCI6ICJTRUFSQ0giLAogICAgICAgICJzeW50YXgiOiAiSU5URVJOQUwiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJzaW5nbGV0b24iLAogICAgICAgICAgICAgICAgIntcInZhbHVlXCI6XCJcIixcImNoYXJzZXROYW1lXCI6XCJJU08tODg1OS0xXCIsXCJjb2xsYXRpb25cIjp7XCJjb2xsYXRpb25OYW1lXCI6XCJJU08tODg1OS0xJGVuX1VTJHByaW1hcnlcIixcImNvZXJjaWJpbGl0eVwiOlwiSU1QTElDSVRcIixcImxvY2FsZVwiOlwiZW5fVVNcIn0sXCJjaGFyc2V0XCI6XCJJU08tODg1OS0xXCIsXCJ2YWx1ZUJ5dGVzXCI6bnVsbH0iCiAgICAgICAgICAgICAgXQogICAgICAgICAgICBdLAogICAgICAgICAgICAibnVsbEFzIjogIlRSVUUiCiAgICAgICAgICB9LAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,2],"DIGESTS":["email.keyword","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"email.keyword"}]}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQG6HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiU0VBUkNIIiwKICAgICAgICAia2luZCI6ICJTRUFSQ0giLAogICAgICAgICJzeW50YXgiOiAiSU5URVJOQUwiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJzaW5nbGV0b24iLAogICAgICAgICAgICAgICAgIntcInZhbHVlXCI6XCJcIixcImNoYXJzZXROYW1lXCI6XCJJU08tODg1OS0xXCIsXCJjb2xsYXRpb25cIjp7XCJjb2xsYXRpb25OYW1lXCI6XCJJU08tODg1OS0xJGVuX1VTJHByaW1hcnlcIixcImNvZXJjaWJpbGl0eVwiOlwiSU1QTElDSVRcIixcImxvY2FsZVwiOlwiZW5fVVNcIn0sXCJjaGFyc2V0XCI6XCJJU08tODg1OS0xXCIsXCJ2YWx1ZUJ5dGVzXCI6bnVsbH0iCiAgICAgICAgICAgICAgXQogICAgICAgICAgICBdLAogICAgICAgICAgICAibnVsbEFzIjogIlRSVUUiCiAgICAgICAgICB9LAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,2],"DIGESTS":["email.keyword","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQG6HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiU0VBUkNIIiwKICAgICAgICAia2luZCI6ICJTRUFSQ0giLAogICAgICAgICJzeW50YXgiOiAiSU5URVJOQUwiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJzaW5nbGV0b24iLAogICAgICAgICAgICAgICAgIntcInZhbHVlXCI6XCJcIixcImNoYXJzZXROYW1lXCI6XCJJU08tODg1OS0xXCIsXCJjb2xsYXRpb25cIjp7XCJjb2xsYXRpb25OYW1lXCI6XCJJU08tODg1OS0xJGVuX1VTJHByaW1hcnlcIixcImNvZXJjaWJpbGl0eVwiOlwiSU1QTElDSVRcIixcImxvY2FsZVwiOlwiZW5fVVNcIn0sXCJjaGFyc2V0XCI6XCJJU08tODg1OS0xXCIsXCJ2YWx1ZUJ5dGVzXCI6bnVsbH0iCiAgICAgICAgICAgICAgXQogICAgICAgICAgICBdLAogICAgICAgICAgICAibnVsbEFzIjogIlRSVUUiCiAgICAgICAgICB9LAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICB9CiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMywKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0,2,2],"DIGESTS":["email.keyword","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"fields":[{"field":"email.keyword"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.yaml index a5f43d257ce..90e144cad8f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_script_push_on_text.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[>(CHAR_LENGTH($2), 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address], SCRIPT->>(CHAR_LENGTH($0), 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), address_length], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCIXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI+IiwKICAgICJraW5kIjogIkdSRUFURVJfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2],"DIGESTS":["address",0]}},"boost":1.0}},"_source":{"includes":["address"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"address_length":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1],"DIGESTS":["address"]}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[address], SCRIPT->>(CHAR_LENGTH($0), 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), address_length], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCIXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI+IiwKICAgICJraW5kIjogIkdSRUFURVJfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1,2],"DIGESTS":["address",0]}},"boost":1.0}},"_source":{"includes":["address"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"address_length":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[1],"DIGESTS":["address"]}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_then_agg_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_then_agg_push.json index 5b355b16b0e..75fea09dc13 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_then_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_sort_then_agg_push.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(avg(balance)=[$1], state=[$0])\n LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)])\n LogicalProject(state=[$7], balance=[$3])\n LogicalSort(sort0=[$3], sort1=[$8], dir0=[ASC-nulls-first], dir1=[ASC-nulls-first])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, state, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), PROJECT->[avg(balance), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"balance\",\"state\",\"age\"],\"excludes\":[]},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg(balance)\":{\"avg\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, state, age], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), PROJECT->[avg(balance), state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"balance\",\"state\",\"age\"],\"excludes\":[]},\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg(balance)\":{\"avg\":{\"field\":\"balance\"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml index 14cf8e6db82..b4577ff19a4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(environment=[$0], status_code=[$2], service=[$3], host=[$4], memory_usage=[$5], response_time=[$6], cpu_usage=[$7], region=[$8], bytes_sent=[$9], _id=[$10], _index=[$11], _score=[$12], _maxscore=[$13], _sort=[$14], _routing=[$15], @timestamp=[WIDTH_BUCKET($1, 3, -(MAX($1) OVER (), MIN($1) OVER ()), MAX($1) OVER ())]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml index 09e016d8e9e..02f72c2be02 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(environment=[$0], status_code=[$2], service=[$3], host=[$4], memory_usage=[$5], response_time=[$6], cpu_usage=[$7], region=[$8], bytes_sent=[$9], _id=[$10], _index=[$11], _score=[$12], _maxscore=[$13], _sort=[$14], _routing=[$15], @timestamp=[WIDTH_BUCKET($1, 3, -(MAX($1) OVER (), MIN($1) OVER ()), MAX($1) OVER ())]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(cpu_usage)=AVG($2)), PROJECT->[avg(cpu_usage), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(cpu_usage)=AVG($2)), PROJECT->[avg(cpu_usage), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span.json index 01258e3fd0b..094516a934a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(age,10)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(age,10)=[SPAN($10, 10, null:NULL)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(age,10)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(age,10)\":{\"histogram\":{\"field\":\"age\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"interval\":10.0}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(age,10)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(age,10)\":{\"histogram\":{\"field\":\"age\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\",\"interval\":10.0}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json index 977fe96090c..6104070d6d4 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_span_non_bucket_nullable.json @@ -1,6 +1,6 @@ { "calcite": { "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(count()=[$1], span(age,10)=[$0])\n LogicalAggregate(group=[{0}], count()=[COUNT()])\n LogicalProject(span(age,10)=[SPAN($10, 10, null:NULL)])\n LogicalFilter(condition=[IS NOT NULL($10)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(age,10)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(age,10)\":{\"histogram\":{\"field\":\"age\",\"missing_bucket\":false,\"order\":\"asc\",\"interval\":10.0}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(age,10)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(age,10)\":{\"histogram\":{\"field\":\"age\",\"missing_bucket\":false,\"order\":\"asc\",\"interval\":10.0}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml index b4384528c0c..0e1e29e9370 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1m)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1m)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1m)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1m"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml index 5021adf62b8..c9e64c16e80 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_by_timespan2.yaml @@ -7,4 +7,4 @@ calcite: LogicalFilter(condition=[IS NOT NULL($3)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1M)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1M"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1M)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1M)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","calendar_interval":"1M"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/udf_geoip_in_agg_pushed.yaml b/integ-test/src/test/resources/expectedOutput/calcite/udf_geoip_in_agg_pushed.yaml index 71a1e22775c..0b6874a6499 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/udf_geoip_in_agg_pushed.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/udf_geoip_in_agg_pushed.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(info.city=[ITEM(GEOIP('my-datasource':VARCHAR, $0), 'city')]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), info.city], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"info.city":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQErXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJVEVNIiwKICAgICJraW5kIjogIklURU0iLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiR0VPSVAiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ1ZHQiOiAiRVhQUl9JUCIsCiAgICAgICAgICAgICJ0eXBlIjogIk9USEVSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJNQVAiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJrZXkiOiB7CiAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgfSwKICAgICAgICAidmFsdWUiOiB7CiAgICAgICAgICAidHlwZSI6ICJBTlkiLAogICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICAgICAic2NhbGUiOiAtMjE0NzQ4MzY0OAogICAgICAgIH0KICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDQKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0,2],"DIGESTS":["my-datasource","host","city"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_weblogs]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), info.city], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"info.city":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQErXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJJVEVNIiwKICAgICJraW5kIjogIklURU0iLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiR0VPSVAiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ1ZHQiOiAiRVhQUl9JUCIsCiAgICAgICAgICAgICJ0eXBlIjogIk9USEVSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgImNsYXNzIjogIm9yZy5vcGVuc2VhcmNoLnNxbC5leHByZXNzaW9uLmZ1bmN0aW9uLlVzZXJEZWZpbmVkRnVuY3Rpb25CdWlsZGVyJDEiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJNQVAiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJrZXkiOiB7CiAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgfSwKICAgICAgICAidmFsdWUiOiB7CiAgICAgICAgICAidHlwZSI6ICJBTlkiLAogICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICAgICAic2NhbGUiOiAtMjE0NzQ4MzY0OAogICAgICAgIH0KICAgICAgfSwKICAgICAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICAgICAiZHluYW1pYyI6IGZhbHNlCiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDQKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[2,0,2],"DIGESTS":["my-datasource","host","city"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_append_command.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_append_command.json deleted file mode 100644 index 5665e159573..00000000000 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_append_command.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalUnion(all=[true])\n LogicalProject(cnt=[$1], gender=[$0])\n LogicalAggregate(group=[{0}], cnt=[COUNT($1)])\n LogicalProject(gender=[$4], balance=[$7])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n LogicalProject(cnt=[$0], gender=[null:VARCHAR])\n LogicalAggregate(group=[{}], cnt=[COUNT()])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableUnion(all=[true])\n EnumerableCalc(expr#0..1=[{inputs}], cnt=[$t1], gender=[$t0])\n EnumerableAggregate(group=[{4}], cnt=[COUNT($7)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[null:VARCHAR], proj#0..1=[{exprs}])\n EnumerableAggregate(group=[{}], cnt=[COUNT()])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])\n" - } -} \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_append_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_append_command.yaml new file mode 100644 index 00000000000..eb4b6f09d58 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_append_command.yaml @@ -0,0 +1,20 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalUnion(all=[true]) + LogicalProject(cnt=[$1], gender=[$0]) + LogicalAggregate(group=[{0}], cnt=[COUNT($1)]) + LogicalProject(gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + LogicalProject(cnt=[$0], gender=[null:VARCHAR]) + LogicalAggregate(group=[{}], cnt=[COUNT()]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableUnion(all=[true]) + EnumerableCalc(expr#0..1=[{inputs}], cnt=[$t1], gender=[$t0]) + EnumerableAggregate(group=[{4}], cnt=[COUNT($7)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + EnumerableCalc(expr#0=[{inputs}], expr#1=[null:VARCHAR], proj#0..1=[{exprs}]) + EnumerableAggregate(group=[{}], cnt=[COUNT()]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml index e57913c2f62..c2b045b4600 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp.yaml @@ -11,6 +11,5 @@ root: ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ :[]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"\ - }}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml index b0a8deed278..0e6f5549a56 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_can_match_shortcut.yaml @@ -15,6 +15,6 @@ root: :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\":\"\ - asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml index b0a8deed278..0e6f5549a56 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_timestamp_no_can_match_shortcut.yaml @@ -15,6 +15,6 @@ root: :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\":\"\ - asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml index e57913c2f62..c2b045b4600 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/asc_sort_with_after_timestamp.yaml @@ -11,6 +11,5 @@ root: ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ :[]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"\ - }}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high.yaml index ffe939e5a52..ec60aa78ed7 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":0,\"timeout\":\"1m\",\"aggregations\":{\"dc(`agent.name`)\":{\"cardinality\"\ - :{\"field\":\"agent.name\"}}}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :{\"field\":\"agent.name\"}}}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high_2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high_2.yaml index 0c147949642..996fc82b1b7 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high_2.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_high_2.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":0,\"timeout\":\"1m\",\"aggregations\":{\"dc(`event.id`)\":{\"cardinality\"\ - :{\"field\":\"event.id\"}}}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :{\"field\":\"event.id\"}}}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_low.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_low.yaml index f064201008e..90117e88947 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_low.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/cardinality_agg_low.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"from\":0,\"\ size\":0,\"timeout\":\"1m\",\"aggregations\":{\"dc(`cloud.region`)\":{\"\ - cardinality\":{\"field\":\"cloud.region\"}}}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + cardinality\":{\"field\":\"cloud.region\"}}}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml index 073078ddf0d..c0f27278561 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_date_histogram_daily.yaml @@ -20,6 +20,6 @@ root: sources\":[{\"span(`@timestamp`,1d)\":{\"date_histogram\":{\"field\"\ :\"@timestamp\",\"missing_bucket\":false,\"order\":\"asc\",\"fixed_interval\"\ :\"1d\"}}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\"\ - :\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + :\"_index\"}}}}}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml index 6a0e0c660da..a032981995e 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms.yaml @@ -21,7 +21,6 @@ root: ,\"missing_bucket\":true,\"missing_order\":\"last\",\"order\":\"desc\"\ }}},{\"cloud.region\":{\"terms\":{\"field\":\"cloud.region\",\"missing_bucket\"\ :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ - :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml index 5c77f33d0cd..43017baa00f 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/composite_terms_keyword.yaml @@ -23,7 +23,6 @@ root: :true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"aws.cloudwatch.log_stream\"\ :{\"terms\":{\"field\":\"aws.cloudwatch.log_stream\",\"missing_bucket\"\ :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ - :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml index 72549142297..f294eb043f3 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_hourly_agg.yaml @@ -10,6 +10,6 @@ root: composite\":{\"size\":1000,\"sources\":[{\"span(`@timestamp`,1h)\":{\"date_histogram\"\ :{\"field\":\"@timestamp\",\"missing_bucket\":false,\"order\":\"asc\",\"\ fixed_interval\":\"1h\"}}}]},\"aggregations\":{\"count()\":{\"value_count\"\ - :{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :{\"field\":\"_index\"}}}}}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml index be30d2a0801..32e0cdae956 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/date_histogram_minute_agg.yaml @@ -14,6 +14,6 @@ root: :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(`@timestamp`,1m)\"\ :{\"date_histogram\":{\"field\":\"@timestamp\",\"missing_bucket\":false,\"\ order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ - :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml index 23ca821adf6..28d71ecf09e 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/default.yaml @@ -10,6 +10,5 @@ root: size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml index ed13e6905cb..16f1fcfa33a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp.yaml @@ -11,6 +11,5 @@ root: ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ :[]},\"sort\":[{\"@timestamp\":{\"order\":\"desc\",\"missing\":\"_last\"\ - }}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml index e2fc446cdd7..8f8424df6b7 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_can_match_shortcut.yaml @@ -15,6 +15,6 @@ root: :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\":\"\ - desc\",\"missing\":\"_last\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + desc\",\"missing\":\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml index e2fc446cdd7..8f8424df6b7 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_timestamp_no_can_match_shortcut.yaml @@ -15,6 +15,6 @@ root: :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\":\"\ - desc\",\"missing\":\"_last\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + desc\",\"missing\":\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml index ed13e6905cb..16f1fcfa33a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/desc_sort_with_after_timestamp.yaml @@ -11,6 +11,5 @@ root: ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ :[]},\"sort\":[{\"@timestamp\":{\"order\":\"desc\",\"missing\":\"_last\"\ - }}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml index 2a85a0971ce..1ec8b25e62b 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_in_range.yaml @@ -20,6 +20,5 @@ root: :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"agent\",\"process\",\"\ log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\",\"\ data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms.yaml index 3031899608b..1fd8d271516 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms.yaml @@ -19,7 +19,6 @@ root: :{\"composite\":{\"size\":1000,\"sources\":[{\"aws.cloudwatch.log_stream\"\ :{\"terms\":{\"field\":\"aws.cloudwatch.log_stream\",\"missing_bucket\"\ :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ - :{\"station\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + :{\"station\":{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms_low_cardinality.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms_low_cardinality.yaml index 2a05fec4f3e..4fd71c39b96 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms_low_cardinality.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/keyword_terms_low_cardinality.yaml @@ -19,7 +19,6 @@ root: :{\"composite\":{\"size\":1000,\"sources\":[{\"aws.cloudwatch.log_stream\"\ :{\"terms\":{\"field\":\"aws.cloudwatch.log_stream\",\"missing_bucket\"\ :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ - :{\"country\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + :{\"country\":{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/multi_terms_keyword.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/multi_terms_keyword.yaml index 80709a3e0f5..2954c1a0fa7 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/multi_terms_keyword.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/multi_terms_keyword.yaml @@ -25,7 +25,6 @@ root: ,\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"\ }}},{\"cloud.region\":{\"terms\":{\"field\":\"cloud.region\",\"missing_bucket\"\ :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ - :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml index 20f024800e5..aa99c0c8636 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message.yaml @@ -15,6 +15,6 @@ root: :true,\"fuzzy_transpositions\":true,\"boost\":1.0}},\"_source\":{\"includes\"\ :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ - aws\",\"event\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + aws\",\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml index fdd6d08721b..069db10a79d 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered.yaml @@ -21,6 +21,5 @@ root: :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"agent\",\"process\",\"\ log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\",\"\ data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml index 4cc79a8db95..e3ba107ee04 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/query_string_on_message_filtered_sorted_num.yaml @@ -22,6 +22,5 @@ root: log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\",\"\ data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ :[]},\"sort\":[{\"@timestamp\":{\"order\":\"asc\",\"missing\":\"_first\"\ - }}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml index f9d406b2906..4406c961892 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range.yaml @@ -14,6 +14,6 @@ root: boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\"\ :{\"includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\"\ ,\"input\",\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\"\ - ,\"aws\",\"event\"],\"excludes\":[]}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"aws\",\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_1.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_1.yaml index 3f99d51799a..40df7fa28c9 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_1.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_1.yaml @@ -22,7 +22,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"\ - from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false,\ - \ pitId=*,\ + from\":0,\"size\":10000,\"timeout\":\"1m\"}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_2.yaml index 400198d6635..fba6186ffd3 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_2.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_agg_2.yaml @@ -20,7 +20,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"\ - from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false,\ - \ pitId=*,\ + from\":0,\"size\":10000,\"timeout\":\"1m\"}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo.yaml index 159c9be49a1..b1717e04211 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo.yaml @@ -32,7 +32,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"\ - from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true,\ - \ searchDone=false, pitId=*,\ + from\":0,\"size\":10000,\"timeout\":\"1m\"}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo_with_metrics.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo_with_metrics.yaml index 4f3004af106..b146d7e284c 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo_with_metrics.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_auto_date_histo_with_metrics.yaml @@ -30,7 +30,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=big5, sourceBuilder={\"\ - from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true,\ - \ searchDone=false, pitId=*,\ + from\":0,\"size\":10000,\"timeout\":\"1m\"}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml index 2663492036c..44ffe43c5c7 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_big_range_big_term_query.yaml @@ -16,6 +16,6 @@ root: adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"\ agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"\ @timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\"\ - ,\"event\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml index 3365a5b0813..c17a49a4c2e 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_big_term_query.yaml @@ -14,6 +14,5 @@ root: :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"agent\",\"process\",\"\ log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\",\"\ data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml index 664d6428c7b..d04271ee003 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_conjunction_small_range_small_term_query.yaml @@ -16,6 +16,6 @@ root: adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"\ agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"\ @timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\"\ - ,\"event\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml index 641befc2867..c0d63da12bf 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_field_disjunction_big_range_small_term_query.yaml @@ -16,6 +16,6 @@ root: adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"\ agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"\ @timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\"\ - ,\"event\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml index 156f9ced9fe..5d49015ec62 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_numeric.yaml @@ -14,6 +14,6 @@ root: adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"\ agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"\ @timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\"\ - ,\"event\"],\"excludes\":[]}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml index 05a16cc76cf..0681c881ce9 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_asc_sort.yaml @@ -15,6 +15,6 @@ root: :{\"includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\"\ ,\"input\",\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\"\ ,\"aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\"\ - :\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :\"asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml index e7322cb282e..1b9d3f2c246 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/range_with_desc_sort.yaml @@ -15,6 +15,6 @@ root: :{\"includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\"\ ,\"input\",\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\"\ ,\"aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"@timestamp\":{\"order\"\ - :\"desc\",\"missing\":\"_last\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :\"desc\",\"missing\":\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml index 23ca821adf6..28d71ecf09e 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/scroll.yaml @@ -10,6 +10,5 @@ root: size\":10,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"agent\",\"process\"\ ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ - :[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml index 20b18df0256..926319755f8 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_can_match_shortcut.yaml @@ -15,6 +15,6 @@ root: :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"meta.file\":{\"order\":\"\ - asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml index 20b18df0256..926319755f8 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_keyword_no_can_match_shortcut.yaml @@ -15,6 +15,6 @@ root: :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"meta.file\":{\"order\":\"\ - asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml index 9d0f5c0ab0c..10228036927 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc.yaml @@ -11,6 +11,5 @@ root: ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ :[]},\"sort\":[{\"metrics.size\":{\"order\":\"asc\",\"missing\":\"_first\"\ - }}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml index 3718496568c..1925583a9e0 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_asc_with_match.yaml @@ -16,6 +16,6 @@ root: :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"metrics.size\":{\"order\"\ - :\"asc\",\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :\"asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml index 27126b931f0..acf47c582d9 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc.yaml @@ -11,6 +11,5 @@ root: ,\"log\",\"message\",\"tags\",\"cloud\",\"input\",\"@timestamp\",\"ecs\"\ ,\"data_stream\",\"meta\",\"host\",\"metrics\",\"aws\",\"event\"],\"excludes\"\ :[]},\"sort\":[{\"metrics.size\":{\"order\":\"desc\",\"missing\":\"_last\"\ - }}]}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + }}]}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml index a146d0531d5..00b13e06628 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/sort_numeric_desc_with_match.yaml @@ -16,6 +16,6 @@ root: :[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\",\"input\"\ ,\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\",\"\ aws\",\"event\"],\"excludes\":[]},\"sort\":[{\"metrics.size\":{\"order\"\ - :\"desc\",\"missing\":\"_last\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :\"desc\",\"missing\":\"_last\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml index ea9331ffa08..56f97a7481e 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/term.yaml @@ -11,6 +11,6 @@ root: value\":\"/var/log/messages/birdknight\",\"boost\":1.0}}},\"_source\":{\"\ includes\":[\"agent\",\"process\",\"log\",\"message\",\"tags\",\"cloud\"\ ,\"input\",\"@timestamp\",\"ecs\",\"data_stream\",\"meta\",\"host\",\"metrics\"\ - ,\"aws\",\"event\"],\"excludes\":[]}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"aws\",\"event\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_1.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_1.yaml index 09bd047cfb9..a3da18d850c 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_1.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_1.yaml @@ -21,7 +21,6 @@ root: ,\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"\ }}},{\"process.name\":{\"terms\":{\"field\":\"process.name\",\"missing_bucket\"\ :true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ - :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + :{\"count()\":{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_2.yaml index 908ab96f04b..c15fd3db7b0 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_2.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/big5/terms_significant_2.yaml @@ -22,6 +22,5 @@ root: }}},{\"aws.cloudwatch.log_stream\":{\"terms\":{\"field\":\"aws.cloudwatch.log_stream\"\ ,\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"\ }}}]},\"aggregations\":{\"count()\":{\"value_count\":{\"field\":\"_index\"\ - }}}}}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + }}}}}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.yaml index 454ce35f3a8..73249d4d9ff 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.yaml @@ -13,6 +13,6 @@ root: ,\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"\ city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"\ missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\"\ - :{\"avg\":{\"field\":\"age\"}}}}}}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :{\"avg\":{\"field\":\"age\"}}}}}}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_date_string.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_date_string.yaml index 289c6297e6f..5bf06c10265 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_date_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_date_string.yaml @@ -16,7 +16,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_date_formats,\ \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ - _source\":{\"includes\":[\"yyyy-MM-dd\"],\"excludes\":[]}}, needClean=true,\ - \ searchDone=false, pitId=*,\ + _source\":{\"includes\":[\"yyyy-MM-dd\"],\"excludes\":[]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_time_string.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_time_string.yaml index c4b8521567a..0a06e142431 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_time_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_time_string.yaml @@ -16,7 +16,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_date_formats,\ \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"\ - _source\":{\"includes\":[\"custom_time\"],\"excludes\":[]}}, needClean=true,\ - \ searchDone=false, pitId=*,\ + _source\":{\"includes\":[\"custom_time\"],\"excludes\":[]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_timestamp_string.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_timestamp_string.yaml index 5eb98d3ca33..778fdeaa1c7 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_timestamp_string.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push_compare_timestamp_string.yaml @@ -14,7 +14,6 @@ root: include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\"\ :true,\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\"\ ,\"address\",\"birthdate\",\"gender\",\"city\",\"lastname\",\"balance\"\ - ,\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, needClean=true,\ - \ searchDone=false, pitId=*,\ + ,\"employer\",\"state\",\"age\",\"email\",\"male\"],\"excludes\":[]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_then_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_then_limit_push.yaml index f0f18391b93..66c7729f993 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_then_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_then_limit_push.yaml @@ -9,6 +9,5 @@ root: \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"query\":{\"\ range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\"\ :true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\"\ - :[]}}, needClean=true, searchDone=false, pitId=*, cursorKeepAlive=null,\ - \ searchAfter=null, searchResponse=null)" + :[]}}, pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml index 5b950289224..ff157e036d0 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_keyword_like_function_case_insensitive.yaml @@ -11,7 +11,6 @@ root: :{\"wildcard\":{\"firstname.keyword\":{\"wildcard\":\"*mbe*\",\"case_insensitive\"\ :true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"account_number\",\"\ firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"\ - state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, needClean=true,\ - \ searchDone=false, pitId=*,\ + state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_5_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_5_push.yaml index 604ec122fdf..7ce50a2b37c 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_5_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_5_push.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + includes\":[\"age\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_filter_5_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_filter_5_push.yaml index d1ba3213130..28b63f58511 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_filter_5_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10_filter_5_push.yaml @@ -15,7 +15,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ - \ sourceBuilder={\"from\":0,\"size\":10,\"timeout\":\"1m\"}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + \ sourceBuilder={\"from\":0,\"size\":10,\"timeout\":\"1m\"}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10from1_10from2_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10from1_10from2_push.yaml index c6fbc4fd672..53ed94585f0 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10from1_10from2_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_10from1_10from2_push.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":3,\"size\":8,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + includes\":[\"age\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_5_10_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_5_10_push.yaml index 604ec122fdf..7ce50a2b37c 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_5_10_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_5_10_push.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + includes\":[\"age\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_offsets_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_offsets_push.yaml index 17960c2d39b..1d4cd474476 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_offsets_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_offsets_push.yaml @@ -7,6 +7,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":3,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ - includes\":[\"age\"],\"excludes\":[]}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + includes\":[\"age\"],\"excludes\":[]}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_push.yaml index 2d14d36d922..d2be3d3d5d2 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_push.yaml @@ -11,7 +11,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ - \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\"}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\"}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_filter_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_filter_push.yaml index f0f312cc4cd..221062310b3 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_filter_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_filter_push.yaml @@ -10,7 +10,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ - \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\"}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\"}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_sort_push.yaml index a49d48e5aa4..f2daa15d506 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_then_sort_push.yaml @@ -8,6 +8,6 @@ root: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\"\ - ,\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_output.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_output.yaml index 51065230f90..db9b43af5bf 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_output.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_output.yaml @@ -30,7 +30,6 @@ root: :true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\"\ :{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"\ missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\"\ - :{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + :{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern.yaml index ec791b34e14..517435c9a92 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern.yaml @@ -10,7 +10,6 @@ root: \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"_source\"\ :{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\"\ ,\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"\ - ,\"patterns_field\"],\"excludes\":[]}}, needClean=true, searchDone=false,\ - \ pitId=*,\ + ,\"patterns_field\"],\"excludes\":[]}}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml index 15f29f544ae..ea56b054bb2 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_patterns_simple_pattern_agg_push.yaml @@ -14,7 +14,6 @@ root: missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"pattern_count\"\ :{\"value_count\":{\"field\":\"_index\"}},\"sample_logs\":{\"top_hits\"\ :{\"from\":0,\"size\":10,\"version\":false,\"seq_no_primary_term\":false,\"\ - explain\":false,\"fields\":[{\"field\":\"email\"}]}}}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + explain\":false,\"fields\":[{\"field\":\"email\"}]}}}}}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_count_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_count_push.yaml index a49d48e5aa4..f2daa15d506 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_count_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_count_push.yaml @@ -8,6 +8,6 @@ root: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\"\ - ,\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_limit_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_limit_push.yaml index a49d48e5aa4..f2daa15d506 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_limit_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_then_limit_push.yaml @@ -8,6 +8,6 @@ root: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"\ includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\"\ - ,\"missing\":\"_first\"}}]}, needClean=true, searchDone=false, pitId=*,\ - \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + ,\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null, searchAfter=null,\ + \ searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml index 3fd26cb19a6..1277837696d 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan.yaml @@ -10,6 +10,6 @@ root: :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1m)\"\ :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ order\":\"asc\",\"fixed_interval\":\"1m\"}}}]},\"aggregations\":{\"count()\"\ - :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml index 167328625f1..017c998a042 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_stats_by_timespan2.yaml @@ -10,6 +10,6 @@ root: :{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"span(birthdate,1M)\"\ :{\"date_histogram\":{\"field\":\"birthdate\",\"missing_bucket\":false,\"\ order\":\"asc\",\"calendar_interval\":\"1M\"}}}]},\"aggregations\":{\"count()\"\ - :{\"value_count\":{\"field\":\"_index\"}}}}}}, needClean=true, searchDone=false,\ - \ pitId=*, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + :{\"value_count\":{\"field\":\"_index\"}}}}}}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_take.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_take.yaml index 7e4de15c4d4..4497e8e6d63 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_take.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_take.yaml @@ -8,7 +8,6 @@ root: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\"\ :{\"take\":{\"top_hits\":{\"from\":0,\"size\":2,\"version\":false,\"seq_no_primary_term\"\ - :false,\"explain\":false,\"fields\":[{\"field\":\"firstname\"}]}}}}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + :false,\"explain\":false,\"fields\":[{\"field\":\"firstname\"}]}}}}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.yaml index 2433ee66003..aff2f01857c 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function.yaml @@ -11,7 +11,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ - \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true,\ - \ searchDone=false, pitId=*,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function_case_insensitive.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function_case_insensitive.yaml index 3187fd69900..2130b25c0e5 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function_case_insensitive.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_text_like_function_case_insensitive.yaml @@ -11,7 +11,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ - \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true,\ - \ searchDone=false, pitId=*,\ + \ sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, pitId=*,\ \ cursorKeepAlive=1m, searchAfter=null, searchResponse=null)" children: [] \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_push.yaml index 6a4ccc5b633..e6157d8ed2a 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_push.yaml @@ -14,7 +14,6 @@ root: - name: OpenSearchIndexScan description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ - \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\"}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\"}, pitId=*,\ + \ cursorKeepAlive=null, searchAfter=null, searchResponse=null)" children: [] diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_sort_push.yaml index 47366bfd683..e246a328965 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_sort_push.yaml @@ -15,7 +15,6 @@ root: description: request: "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account,\ \ sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"sort\":[{\"\ - age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean=true,\ - \ searchDone=false, pitId=*, cursorKeepAlive=null, searchAfter=null,\ - \ searchResponse=null)" + age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, pitId=*, cursorKeepAlive=null,\ + \ searchAfter=null, searchResponse=null)" children: [] diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index b52d4a06820..620c9b97cf7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -136,7 +136,7 @@ public static class AggregateBuilderHelper { final Map fieldTypes; final RelOptCluster cluster; final boolean bucketNullable; - final int bucketSize; + final int queryBucketSize; > T build(RexNode node, T aggBuilder) { return build(node, aggBuilder::field, aggBuilder::script); @@ -264,7 +264,8 @@ public static Pair, OpenSearchAggregationResponseParser + " aggregation"); } AggregationBuilder compositeBuilder = - AggregationBuilders.composite("composite_buckets", buckets).size(helper.bucketSize); + AggregationBuilders.composite("composite_buckets", buckets) + .size(helper.queryBucketSize); if (subBuilder != null) { compositeBuilder.subAggregations(subBuilder); } @@ -763,7 +764,7 @@ private static ValuesSourceAggregationBuilder createTermsAggregationBuilder( helper.build( group, new TermsAggregationBuilder(bucketName) - .size(helper.bucketSize) + .size(helper.queryBucketSize) .order(BucketOrder.key(true))); return withValueTypeHint( sourceBuilder, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java index 7a5bc830d05..04af888cfdd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java @@ -12,11 +12,15 @@ import java.io.IOException; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.function.Consumer; import java.util.function.Function; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.TestOnly; import org.opensearch.action.search.*; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -28,6 +32,8 @@ import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; import org.opensearch.search.SearchModule; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.composite.InternalComposite; import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.sort.FieldSortBuilder; @@ -48,6 +54,7 @@ @Getter @ToString public class OpenSearchQueryRequest implements OpenSearchRequest { + private static final Logger LOG = LogManager.getLogger(); /** {@link OpenSearchRequest.IndexName}. */ private final IndexName indexName; @@ -62,51 +69,63 @@ public class OpenSearchQueryRequest implements OpenSearchRequest { /** List of includes expected in the response. */ @EqualsAndHashCode.Exclude @ToString.Exclude private final List includes; - @EqualsAndHashCode.Exclude private boolean needClean = true; + @EqualsAndHashCode.Exclude @ToString.Exclude private boolean needClean = true; /** Indicate the search already done. */ - private boolean searchDone = false; + @EqualsAndHashCode.Exclude @ToString.Exclude private boolean searchDone = false; private String pitId; - private TimeValue cursorKeepAlive; + private final TimeValue cursorKeepAlive; private Object[] searchAfter; private SearchResponse searchResponse = null; - /** Constructor of OpenSearchQueryRequest. */ - public OpenSearchQueryRequest( - String indexName, int size, OpenSearchExprValueFactory factory, List includes) { - this(new IndexName(indexName), size, factory, includes); - } + @ToString.Exclude private Map afterKey; - /** Constructor of OpenSearchQueryRequest. */ - public OpenSearchQueryRequest( - IndexName indexName, int size, OpenSearchExprValueFactory factory, List includes) { - this.indexName = indexName; - this.sourceBuilder = new SearchSourceBuilder(); + @TestOnly + static OpenSearchQueryRequest of( + String indexName, int size, OpenSearchExprValueFactory factory, List includes) { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); sourceBuilder.from(0); sourceBuilder.size(size); sourceBuilder.timeout(DEFAULT_QUERY_TIMEOUT); - this.exprValueFactory = factory; - this.includes = includes; + return of(indexName, sourceBuilder, factory, includes); } - /** Constructor of OpenSearchQueryRequest. */ - public OpenSearchQueryRequest( + @TestOnly + static OpenSearchQueryRequest of( + String indexName, + SearchSourceBuilder sourceBuilder, + OpenSearchExprValueFactory factory, + List includes) { + return OpenSearchQueryRequest.of(new IndexName(indexName), sourceBuilder, factory, includes); + } + + /** Build an OpenSearchQueryRequest without PIT support. */ + public static OpenSearchQueryRequest of( IndexName indexName, SearchSourceBuilder sourceBuilder, OpenSearchExprValueFactory factory, List includes) { - this.indexName = indexName; - this.sourceBuilder = sourceBuilder; - this.exprValueFactory = factory; - this.includes = includes; + return new OpenSearchQueryRequest(indexName, sourceBuilder, factory, includes, null, null); } - /** Constructor of OpenSearchQueryRequest with PIT support. */ - public OpenSearchQueryRequest( + /** Build an OpenSearchQueryRequest with PIT support. */ + public static OpenSearchQueryRequest pitOf( + IndexName indexName, + SearchSourceBuilder sourceBuilder, + OpenSearchExprValueFactory factory, + List includes, + TimeValue cursorKeepAlive, + String pitId) { + return new OpenSearchQueryRequest( + indexName, sourceBuilder, factory, includes, cursorKeepAlive, pitId); + } + + /** Do not new it directly, use of() and pitOf() instead. */ + OpenSearchQueryRequest( IndexName indexName, SearchSourceBuilder sourceBuilder, OpenSearchExprValueFactory factory, @@ -171,27 +190,56 @@ public OpenSearchResponse search( Function searchAction, Function scrollAction) { if (this.pitId == null) { - // When SearchRequest doesn't contain PitId, fetch single page request - if (searchDone) { - return new OpenSearchResponse( - SearchHits.empty(), exprValueFactory, includes, isCountAggRequest()); - } else { - // get the value before set searchDone = true - boolean isCountAggRequest = isCountAggRequest(); - searchDone = true; - return new OpenSearchResponse( - searchAction.apply( - new SearchRequest().indices(indexName.getIndexNames()).source(sourceBuilder)), - exprValueFactory, - includes, - isCountAggRequest); - } + return search(searchAction); } else { // Search with PIT instead of scroll API return searchWithPIT(searchAction); } } + private OpenSearchResponse search(Function searchAction) { + OpenSearchResponse openSearchResponse; + if (searchDone) { + openSearchResponse = + new OpenSearchResponse( + SearchHits.empty(), exprValueFactory, includes, isCountAggRequest()); + } else { + // Set afterKey to request, null for first round (afterKey is null in the beginning). + if (this.sourceBuilder.aggregations() != null) { + this.sourceBuilder.aggregations().getAggregatorFactories().stream() + .filter(a -> a instanceof CompositeAggregationBuilder) + .forEach(c -> ((CompositeAggregationBuilder) c).aggregateAfter(afterKey)); + if (LOG.isDebugEnabled()) { + LOG.debug(sourceBuilder); + } + } + + SearchRequest searchRequest = + new SearchRequest().indices(indexName.getIndexNames()).source(this.sourceBuilder); + this.searchResponse = searchAction.apply(searchRequest); + + openSearchResponse = + new OpenSearchResponse( + this.searchResponse, exprValueFactory, includes, isCountAggRequest()); + + // Update afterKey from response + if (openSearchResponse.isAggregationResponse()) { + openSearchResponse.getAggregations().asList().stream() + .filter(a -> a instanceof InternalComposite) + .forEach(c -> afterKey = ((InternalComposite) c).afterKey()); + } + if (afterKey != null) { + // For composite aggregation, searchDone is determined by response result. + searchDone = openSearchResponse.isEmpty(); + } else { + // Direct set searchDone to true for non-composite aggregation. + searchDone = true; + } + needClean = searchDone; + } + return openSearchResponse; + } + public OpenSearchResponse searchWithPIT(Function searchAction) { OpenSearchResponse openSearchResponse; if (searchDone) { @@ -237,6 +285,9 @@ public OpenSearchResponse searchWithPIT(Function if (searchHits != null && searchHits.length > 0) { searchAfter = searchHits[searchHits.length - 1].getSortValues(); this.sourceBuilder.searchAfter(searchAfter); + if (LOG.isDebugEnabled()) { + LOG.debug(sourceBuilder); + } } } return openSearchResponse; @@ -252,6 +303,8 @@ public void clean(Consumer cleanAction) { } } finally { this.pitId = null; + this.searchAfter = null; + this.afterKey = null; } } @@ -264,6 +317,8 @@ public void forceClean(Consumer cleanAction) { } } finally { this.pitId = null; + this.searchAfter = null; + this.afterKey = null; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 5c3ecd11e49..435cef22ef4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -24,6 +24,7 @@ import lombok.ToString; import org.apache.commons.lang3.tuple.Pair; import org.apache.lucene.search.join.ScoreMode; +import org.jetbrains.annotations.TestOnly; import org.opensearch.action.search.CreatePitRequest; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.BoolQueryBuilder; @@ -71,9 +72,7 @@ public class OpenSearchRequestBuilder { private int startFrom = 0; - @ToString.Exclude private final Settings settings; - - @ToString.Exclude private boolean topHitsAgg = false; + @EqualsAndHashCode.Exclude @ToString.Exclude private final Settings settings; public static class PushDownUnSupportedException extends RuntimeException { public PushDownUnSupportedException(String message) { @@ -98,7 +97,10 @@ public OpenSearchRequestBuilder( * Build DSL request. * * @return query request with PIT or scroll request + * @deprecated for testing only now. */ + @TestOnly + @Deprecated public OpenSearchRequest build( OpenSearchRequest.IndexName indexName, TimeValue cursorKeepAlive, OpenSearchClient client) { return build(indexName, cursorKeepAlive, client, false); @@ -114,7 +116,7 @@ public OpenSearchRequest build( * 2. If mapping is empty. It means no data in the index. PIT search relies on `_id` fields to do sort, thus it will fail if using PIT search in this case. */ if (sourceBuilder.size() == 0 || isMappingEmpty) { - return new OpenSearchQueryRequest(indexName, sourceBuilder, exprValueFactory, List.of()); + return OpenSearchQueryRequest.of(indexName, sourceBuilder, exprValueFactory, List.of()); } return buildRequestWithPit(indexName, cursorKeepAlive, client); } @@ -130,13 +132,13 @@ private OpenSearchRequest buildRequestWithPit( sourceBuilder.size(maxResultWindow - startFrom); // Search with PIT request String pitId = createPit(indexName, cursorKeepAlive, client); - return new OpenSearchQueryRequest( + return OpenSearchQueryRequest.pitOf( indexName, sourceBuilder, exprValueFactory, includes, cursorKeepAlive, pitId); } else { sourceBuilder.from(startFrom); sourceBuilder.size(size); // Search with non-Pit request - return new OpenSearchQueryRequest(indexName, sourceBuilder, exprValueFactory, includes); + return OpenSearchQueryRequest.of(indexName, sourceBuilder, exprValueFactory, includes); } } else { if (startFrom != 0) { @@ -145,7 +147,7 @@ private OpenSearchRequest buildRequestWithPit( sourceBuilder.size(pageSize); // Search with PIT request String pitId = createPit(indexName, cursorKeepAlive, client); - return new OpenSearchQueryRequest( + return OpenSearchQueryRequest.pitOf( indexName, sourceBuilder, exprValueFactory, includes, cursorKeepAlive, pitId); } } @@ -239,6 +241,16 @@ public void pushDownSortSuppliers(List>> sortBuilderSupp } } + /** Push down the limit to requestedTotalSize for paginating aggregation. */ + public void pushDownLimitToRequestTotal(Integer limit, Integer offset) { + requestedTotalSize = Math.min(requestedTotalSize, limit + offset); + } + + /** Reset the requestedTotalSize since we convert composite aggregation to others. */ + public void resetRequestTotal() { + requestedTotalSize = Integer.MAX_VALUE; + } + public void pushDownLimit(Integer limit, Integer offset) { // If there are multiple limit, we take the minimum among them // E.g. for `source=t | head 10 | head 5`, we take 5 diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchScrollRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchScrollRequest.java index be5c1f0baf9..4cf9a868116 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchScrollRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchScrollRequest.java @@ -60,7 +60,7 @@ public class OpenSearchScrollRequest implements OpenSearchRequest { public static final String NO_SCROLL_ID = ""; - @EqualsAndHashCode.Exclude private boolean needClean = true; + @EqualsAndHashCode.Exclude @ToString.Exclude private boolean needClean = true; @Getter @EqualsAndHashCode.Exclude @ToString.Exclude private final List includes; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index e35c7efcf7b..0a47dc64a5e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -20,12 +20,15 @@ import java.util.Map; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; +import lombok.Getter; import lombok.ToString; +import org.jetbrains.annotations.TestOnly; import org.opensearch.action.search.SearchResponse; import org.opensearch.core.common.text.Text; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.search.aggregations.bucket.composite.InternalComposite; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprLongValue; import org.opensearch.sql.data.model.ExprStringValue; @@ -39,11 +42,13 @@ @ToString public class OpenSearchResponse implements Iterable { + public static final OpenSearchResponse EMPTY = empty(); + /** Search query result (non-aggregation). */ private final SearchHits hits; /** Search aggregation result. */ - private final Aggregations aggregations; + @Getter private final Aggregations aggregations; /** List of requested include fields. */ private final List includes; @@ -53,12 +58,17 @@ public class OpenSearchResponse implements Iterable { /** OpenSearchExprValueFactory used to build ExprValue from search result. */ @EqualsAndHashCode.Exclude private final OpenSearchExprValueFactory exprValueFactory; - /** Constructor of OpenSearchResponse. */ - public OpenSearchResponse( + /** The empty OpenSearchResponse which is used for invalid search. */ + private static OpenSearchResponse empty() { + return new OpenSearchResponse(SearchHits.empty(), null, List.of(), false); + } + + @TestOnly + public static OpenSearchResponse of( SearchResponse searchResponse, OpenSearchExprValueFactory exprValueFactory, List includes) { - this(searchResponse, exprValueFactory, includes, false); + return new OpenSearchResponse(searchResponse, exprValueFactory, includes, false); } /** Constructor of OpenSearchResponse. */ @@ -94,22 +104,43 @@ public OpenSearchResponse( * @return true for empty */ public boolean isEmpty() { - return (hits.getHits() == null) - || (((hits.getHits().length == 0) && aggregations == null) - && (!isCountAgg - || hits.getTotalHits() == null)); // check total hits if is count aggregation + if (isCountResponse()) { + return hits.getTotalHits() == null; + } else if (isAggregationResponse()) { + return aggregations.asList().isEmpty(); + } else { + return getHitsSize() == 0; + } } public boolean isAggregationResponse() { return aggregations != null; } + public boolean isCompositeAggregationResponse() { + return isAggregationResponse() + && !aggregations.asList().isEmpty() + && (aggregations.asList().get(0) instanceof InternalComposite); + } + + /** + * Get the size of composite aggregation bucket. Must be called after + * isCompositeAggregationResponse() is true. + */ + public int getCompositeBucketSize() { + if (isCompositeAggregationResponse()) { + return ((InternalComposite) aggregations.asList().get(0)).getBuckets().size(); + } + assert false : "Should never call here"; + return -1; + } + public boolean isCountResponse() { return isCountAgg; } public int getHitsSize() { - return hits.getHits() == null ? 0 : hits.getHits().length; + return hits == null ? 0 : hits.getHits() == null ? 0 : hits.getHits().length; } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index ab9ec64d102..d7539312cd1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -5,6 +5,8 @@ package org.opensearch.sql.opensearch.storage; +import static org.opensearch.search.aggregations.MultiBucketConsumerService.DEFAULT_MAX_BUCKETS; + import com.google.common.annotations.VisibleForTesting; import java.util.HashMap; import java.util.LinkedHashMap; @@ -198,10 +200,16 @@ public Integer getMaxResultWindow() { return cachedMaxResultWindow; } - public Integer getBucketSize() { - return Math.min( - settings.getSettingValue(Settings.Key.QUERY_BUCKET_SIZE), - settings.getSettingValue(Settings.Key.SEARCH_MAX_BUCKETS)); + public Integer getQueryBucketSize() { + return Math.min(settings.getSettingValue(Settings.Key.QUERY_BUCKET_SIZE), getMaxBuckets()); + } + + public Integer getMaxBuckets() { + try { + return settings.getSettingValue(Settings.Key.SEARCH_MAX_BUCKETS); + } catch (Exception e) { + return DEFAULT_MAX_BUCKETS; + } } /** TODO: Push down operations to index scan operator as much as possible in future. */ @@ -293,6 +301,7 @@ public OpenSearchResourceMonitor createOpenSearchResourceMonitor() { return new OpenSearchResourceMonitor(getSettings(), new OpenSearchMemoryHealthy(settings)); } + /** The v3 API to build an OpenSearchRequest, calling by CalciteEnumerableIndexScan */ public OpenSearchRequest buildRequest(OpenSearchRequestBuilder requestBuilder) { final TimeValue cursorKeepAlive = settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE); return requestBuilder.build( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java index 80ad06422e2..a04897ce9b4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/AbstractCalciteIndexScan.java @@ -9,7 +9,6 @@ import static org.opensearch.sql.common.setting.Settings.Key.CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -125,7 +124,7 @@ public double estimateRowCount(RelMetadataQuery mq) { case AGGREGATION -> mq.getRowCount((RelNode) operation.digest()); case PROJECT, SORT, SORT_EXPR -> rowCount; case SORT_AGG_METRICS -> - NumberUtil.min(rowCount, osIndex.getBucketSize().doubleValue()); + NumberUtil.min(rowCount, osIndex.getQueryBucketSize().doubleValue()); // Refer the org.apache.calcite.rel.metadata.RelMdRowCount case FILTER, SCRIPT -> NumberUtil.multiply( @@ -265,35 +264,6 @@ protected List getCollationNames(List collations) { .toList(); } - /** - * Check if all sort-by collations equal aggregators that are pushed down. E.g. In `stats avg(age) - * as avg_age, sum(age) as sum_age by state | sort avg_age, sum_age`, the sort keys `avg_age`, - * `sum_age` which equal the pushed down aggregators `avg(age)`, `sum(age)`. - * - * @param collations List of collation names to check against aggregators. - * @return True if all collation names match all aggregator output, false otherwise. - */ - protected boolean isAllCollationNamesEqualAggregators(List collations) { - Stream aggregates = - pushDownContext.stream() - .filter(action -> action.type() == PushDownType.AGGREGATION) - .map(action -> ((LogicalAggregate) action.digest())); - return aggregates - .map(aggregate -> isAllCollationNamesEqualAggregators(aggregate, collations)) - .reduce(false, Boolean::logicalOr); - } - - private boolean isAllCollationNamesEqualAggregators( - LogicalAggregate aggregate, List collations) { - List fieldNames = aggregate.getRowType().getFieldNames(); - // The output fields of the aggregate are in the format of - // [...grouping fields, ...aggregator fields], so we set an offset to skip - // the grouping fields. - int groupOffset = aggregate.getGroupSet().cardinality(); - List fieldsWithoutGrouping = fieldNames.subList(groupOffset, fieldNames.size()); - return new HashSet<>(collations).equals(new HashSet<>(fieldsWithoutGrouping)); - } - /** * Check if any sort-by collations is in aggregators that are pushed down. E.g. In `stats avg(age) * as avg_age by state | sort avg_age`, the sort clause has `avg_age` which is an aggregator. The diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java index 4019346e055..a0b33f3b541 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScanner.java @@ -13,6 +13,8 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import javax.annotation.Nullable; +import org.opensearch.OpenSearchException; +import org.opensearch.OpenSearchSecurityException; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.exception.NonFallbackCalciteException; import org.opensearch.sql.opensearch.client.OpenSearchClient; @@ -63,9 +65,14 @@ public class BackgroundSearchScanner { @Nullable private final Executor backgroundExecutor; private CompletableFuture nextBatchFuture = null; private boolean stopIteration = false; + private final int maxResultWindow; + private final int queryBucketSize; - public BackgroundSearchScanner(OpenSearchClient client) { + public BackgroundSearchScanner( + OpenSearchClient client, int maxResultWindow, int queryBucketSize) { this.client = client; + this.maxResultWindow = maxResultWindow; + this.queryBucketSize = queryBucketSize; // We can only actually do the background operation if we have the ability to access the thread // pool. Otherwise, fallback to synchronous fetch. if (client.getNodeClient().isPresent()) { @@ -104,11 +111,26 @@ private OpenSearchResponse getCurrentResponse(OpenSearchRequest request) { if (isAsync()) { try { return nextBatchFuture.get(); + } catch (OpenSearchSecurityException e) { + throw e; } catch (InterruptedException | ExecutionException e) { + if (e.getCause() instanceof OpenSearchSecurityException) { + throw (OpenSearchSecurityException) e.getCause(); + } + if (e.getCause() instanceof OpenSearchException) { + if (((OpenSearchException) e.getCause()).getRootCause() + instanceof ArrayIndexOutOfBoundsException) { + // It could cause by searching CompositeAggregator with the last afterKey + // which is not exist in the index. + // In this case, we can safely ignore this exception. + return OpenSearchResponse.EMPTY; + } + } throw new NonFallbackCalciteException( "Failed to fetch data from the index: the background task failed or interrupted.\n" + " Inner error: " - + e.getMessage()); + + e.getMessage(), + e); } } else { return client.search(request); @@ -120,18 +142,23 @@ private OpenSearchResponse getCurrentResponse(OpenSearchRequest request) { * also trigger the next background fetch. * * @param request The OpenSearch request to execute - * @param maxResultWindow Maximum number of results to fetch per batch * @return SearchBatchResult containing the current batch's iterator and completion status * @throws NonFallbackCalciteException if the background fetch fails or is interrupted */ - public SearchBatchResult fetchNextBatch(OpenSearchRequest request, int maxResultWindow) { + public SearchBatchResult fetchNextBatch(OpenSearchRequest request) { OpenSearchResponse response = getCurrentResponse(request); // Determine if we need future batches - if (response.isAggregationResponse() - || response.isCountResponse() - || response.getHitsSize() < maxResultWindow) { + if (response.isCountResponse()) { stopIteration = true; + } else if (response.isCompositeAggregationResponse()) { + // For composite aggregations, if we get fewer buckets than requested, we're done + stopIteration = response.getCompositeBucketSize() < queryBucketSize; + } else if (response.isAggregationResponse()) { + stopIteration = true; + } else { + // For regular search results, if we get fewer hits than maxResultWindow, we're done + stopIteration = response.getHitsSize() < maxResultWindow; } Iterator iterator; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java index ddf14cfee38..b00b00a8f17 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableIndexScan.java @@ -118,6 +118,7 @@ public Enumerator enumerator() { getRowType().getFieldNames(), requestBuilder.getMaxResponseSize(), requestBuilder.getMaxResultWindow(), + osIndex.getQueryBucketSize(), osIndex.buildRequest(requestBuilder), osIndex.createOpenSearchResourceMonitor()); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 4aaac41cc75..cd9d42b8955 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -315,13 +315,15 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { return null; } CalciteLogicalIndexScan newScan = copyWithNewTraitSet(sort.getTraitSet()); - AbstractAction newAction = - (AggregationBuilderAction) - aggAction -> - aggAction.rePushDownSortAggMeasure( - sort.getCollation().getFieldCollations(), rowType.getFieldNames()); + newScan + .pushDownContext + .getAggPushDownAction() + .rePushDownSortAggMeasure( + sort.getCollation().getFieldCollations(), rowType.getFieldNames()); + AbstractAction action = + (OSRequestBuilderAction) requestAction -> requestAction.resetRequestTotal(); Object digest = sort.getCollation().getFieldCollations(); - newScan.pushDownContext.add(PushDownType.SORT_AGG_METRICS, digest, newAction); + newScan.pushDownContext.add(PushDownType.SORT_AGG_METRICS, digest, action); return newScan; } catch (Exception e) { if (LOG.isDebugEnabled()) { @@ -334,9 +336,10 @@ public CalciteLogicalIndexScan pushDownSortAggregateMeasure(Sort sort) { public CalciteLogicalIndexScan pushDownRareTop(Project project, RareTopDigest digest) { try { CalciteLogicalIndexScan newScan = copyWithNewSchema(project.getRowType()); - AbstractAction newAction = - (AggregationBuilderAction) aggAction -> aggAction.rePushDownRareTop(digest); - newScan.pushDownContext.add(PushDownType.RARE_TOP, digest, newAction); + newScan.pushDownContext.getAggPushDownAction().rePushDownRareTop(digest); + AbstractAction action = + (OSRequestBuilderAction) requestAction -> requestAction.resetRequestTotal(); + newScan.pushDownContext.add(PushDownType.RARE_TOP, digest, action); return newScan; } catch (Exception e) { if (LOG.isDebugEnabled()) { @@ -364,7 +367,7 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, Project project) { .filter(entry -> schema.contains(entry.getKey())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); List outputFields = aggregate.getRowType().getFieldNames(); - int bucketSize = osIndex.getBucketSize(); + int queryBucketSize = osIndex.getQueryBucketSize(); boolean bucketNullable = Boolean.parseBoolean( aggregate.getHints().stream() @@ -374,7 +377,7 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, Project project) { .orElseGet(() -> "true")); AggregateAnalyzer.AggregateBuilderHelper helper = new AggregateAnalyzer.AggregateBuilderHelper( - getRowType(), fieldTypes, getCluster(), bucketNullable, bucketSize); + getRowType(), fieldTypes, getCluster(), bucketNullable, queryBucketSize); final Pair, OpenSearchAggregationResponseParser> builderAndParser = AggregateAnalyzer.analyze(aggregate, project, outputFields, helper); Map extendedTypeMapping = @@ -406,17 +409,24 @@ public AbstractRelNode pushDownLimit(LogicalSort sort, Integer limit, Integer of if (pushDownContext.isAggregatePushed()) { // Push down the limit into the aggregation bucket in advance to detect whether the limit // can update the aggregation builder - boolean updated = + boolean canUpdate = pushDownContext.getAggPushDownAction().pushDownLimitIntoBucketSize(limit + offset); - if (!updated && offset > 0) return null; + if (!canUpdate && offset > 0) return null; CalciteLogicalIndexScan newScan = this.copyWithNewSchema(getRowType()); - // Simplify the action if it doesn't update the aggregation builder, otherwise keep the - // original action - // It won't change the aggregation builder by do this action again since it's idempotent - AggregationBuilderAction action = - updated - ? aggAction -> aggAction.pushDownLimitIntoBucketSize(limit + offset) - : aggAction -> {}; + if (canUpdate) { + newScan + .pushDownContext + .getAggPushDownAction() + .pushDownLimitIntoBucketSize(limit + offset); + } + AbstractAction action; + if (pushDownContext.getAggPushDownAction().isCompositeAggregation()) { + action = + (OSRequestBuilderAction) + requestBuilder -> requestBuilder.pushDownLimitToRequestTotal(limit, offset); + } else { + action = (AggregationBuilderAction) aggAction -> {}; + } newScan.pushDownContext.add(PushDownType.LIMIT, new LimitDigest(limit, offset), action); return offset > 0 ? sort.copy(sort.getTraitSet(), List.of(newScan)) : newScan; } else { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java index e684d128914..05bd00dcf2c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexEnumerator.java @@ -40,9 +40,6 @@ public class OpenSearchIndexEnumerator implements Enumerator { /** Largest number of rows allowed in the response. */ @EqualsAndHashCode.Include @ToString.Include private final int maxResponseSize; - /** Largest number of rows allowed in the response. */ - @EqualsAndHashCode.Include @ToString.Include private final int maxResultWindow; - /** How many moveNext() calls to perform resource check once. */ private static final long NUMBER_OF_NEXT_CALL_TO_CHECK = 1000; @@ -62,6 +59,7 @@ public OpenSearchIndexEnumerator( List fields, int maxResponseSize, int maxResultWindow, + int queryBucketSize, OpenSearchRequest request, ResourceMonitor monitor) { if (!monitor.isHealthy()) { @@ -71,16 +69,14 @@ public OpenSearchIndexEnumerator( this.fields = fields; this.request = request; this.maxResponseSize = maxResponseSize; - this.maxResultWindow = maxResultWindow; this.monitor = monitor; this.client = client; - this.bgScanner = new BackgroundSearchScanner(client); + this.bgScanner = new BackgroundSearchScanner(client, maxResultWindow, queryBucketSize); this.bgScanner.startScanning(request); } private Iterator fetchNextBatch() { - BackgroundSearchScanner.SearchBatchResult result = - bgScanner.fetchNextBatch(request, maxResultWindow); + BackgroundSearchScanner.SearchBatchResult result = bgScanner.fetchNextBatch(request); return result.iterator(); } @@ -125,7 +121,7 @@ public boolean moveNext() { @Override public void reset() { bgScanner.reset(request); - iterator = bgScanner.fetchNextBatch(request, maxResultWindow).iterator(); + iterator = bgScanner.fetchNextBatch(request).iterator(); queryCount = 0; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java index 5d96caff9f9..81595cc56fd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggPushDownAction.java @@ -5,6 +5,8 @@ package org.opensearch.sql.opensearch.storage.scan.context; +import static org.opensearch.search.aggregations.MultiBucketConsumerService.DEFAULT_MAX_BUCKETS; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -43,8 +45,6 @@ @Getter @EqualsAndHashCode public class AggPushDownAction implements OSRequestBuilderAction { - private static final int MAX_BUCKET_SIZE = 65535; - private Pair, OpenSearchAggregationResponseParser> builderAndParser; private final Map extendedTypeMapping; private final long scriptCount; @@ -186,13 +186,13 @@ public void rePushDownRareTop(RareTopDigest digest) { for (int i = 0; i < composite.sources().size(); i++) { TermsValuesSourceBuilder terms = (TermsValuesSourceBuilder) composite.sources().get(i); if (i == 0) { // first - aggregationBuilder = buildTermsAggregationBuilder(terms, null, MAX_BUCKET_SIZE); + aggregationBuilder = buildTermsAggregationBuilder(terms, null, DEFAULT_MAX_BUCKETS); } else if (i == composite.sources().size() - 1) { // last aggregationBuilder.subAggregation( buildTermsAggregationBuilder(terms, bucketOrder, digest.number())); } else { aggregationBuilder.subAggregation( - buildTermsAggregationBuilder(terms, null, MAX_BUCKET_SIZE)); + buildTermsAggregationBuilder(terms, null, DEFAULT_MAX_BUCKETS)); } } } else { @@ -390,6 +390,11 @@ public void pushDownSortIntoAggBucket( // TODO for MultiTermsAggregationBuilder } + public boolean isCompositeAggregation() { + return builderAndParser.getLeft().stream() + .anyMatch(builder -> builder instanceof CompositeAggregationBuilder); + } + /** * Check if the limit can be pushed down into aggregation bucket when the limit size is less than * bucket number. diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java index f2362e3c1cc..f9f43c89a7b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/AggregationBuilderAction.java @@ -11,6 +11,6 @@ default void pushOperation(PushDownContext context, PushDownOperation operation) // Apply transformation to aggregation builder in the optimization phase as some transformation // may cause exception. We need to detect that exception in advance. apply(context.getAggPushDownAction()); - context.getOperationsForAgg().add(operation); + context.addOperationForAgg(operation); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/OSRequestBuilderAction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/OSRequestBuilderAction.java index 6abe97266f7..d3a762c22d8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/OSRequestBuilderAction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/OSRequestBuilderAction.java @@ -10,6 +10,6 @@ /** A lambda action to apply on the {@link OpenSearchRequestBuilder} */ public interface OSRequestBuilderAction extends AbstractAction { default void pushOperation(PushDownContext context, PushDownOperation operation) { - context.getOperationsForRequestBuilder().add(operation); + context.addOperationForRequestBuilder(operation); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java index 16c81facace..4f84746ade4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/context/PushDownContext.java @@ -5,10 +5,8 @@ package org.opensearch.sql.opensearch.storage.scan.context; -import com.google.common.collect.Iterators; import java.util.AbstractCollection; import java.util.ArrayDeque; -import java.util.Collections; import java.util.Iterator; import lombok.Getter; import org.jetbrains.annotations.NotNull; @@ -20,6 +18,7 @@ @Getter public class PushDownContext extends AbstractCollection { private final OpenSearchIndex osIndex; + private ArrayDeque queue = new ArrayDeque<>(); private ArrayDeque operationsForRequestBuilder; private boolean isAggregatePushed = false; @@ -67,33 +66,28 @@ public PushDownContext cloneWithoutSort() { @NotNull @Override public Iterator iterator() { - if (operationsForRequestBuilder == null) { - return Collections.emptyIterator(); - } else if (operationsForAgg == null) { - return operationsForRequestBuilder.iterator(); - } else { - return Iterators.concat(operationsForRequestBuilder.iterator(), operationsForAgg.iterator()); - } + return queue.iterator(); } @Override public int size() { - return (operationsForRequestBuilder == null ? 0 : operationsForRequestBuilder.size()) - + (operationsForAgg == null ? 0 : operationsForAgg.size()); + return queue.size(); } - ArrayDeque getOperationsForRequestBuilder() { + void addOperationForRequestBuilder(PushDownOperation operation) { if (operationsForRequestBuilder == null) { this.operationsForRequestBuilder = new ArrayDeque<>(); } - return operationsForRequestBuilder; + operationsForRequestBuilder.add(operation); + queue.add(operation); } - ArrayDeque getOperationsForAgg() { + void addOperationForAgg(PushDownOperation operation) { if (operationsForAgg == null) { this.operationsForAgg = new ArrayDeque<>(); } - return operationsForAgg; + operationsForAgg.add(operation); + queue.add(operation); } @Override diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java index 9b6da17567e..81261aa7a70 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java @@ -409,7 +409,7 @@ void cleanup_rethrows_exception() { @SneakyThrows void cleanup_pit_request() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), new SearchSourceBuilder(), factory, diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java index f93da87c303..afd210da1ff 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java @@ -435,7 +435,7 @@ void cleanup_with_IOException() { @SneakyThrows void cleanup_pit_request() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), new SearchSourceBuilder(), factory, @@ -453,7 +453,7 @@ void cleanup_pit_request() { void cleanup_pit_request_throw_exception() { when(restClient.deletePit(any(), any())).thenThrow(new IOException()); OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), new SearchSourceBuilder(), factory, diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequestTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequestTest.java index 5ce83c66ff8..5fee9503ca6 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequestTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequestTest.java @@ -60,10 +60,10 @@ public class OpenSearchQueryRequestTest { @Mock private OpenSearchExprValueFactory factory; private final OpenSearchQueryRequest request = - new OpenSearchQueryRequest("test", 200, factory, List.of()); + OpenSearchQueryRequest.of("test", 200, factory, List.of()); private final OpenSearchQueryRequest remoteRequest = - new OpenSearchQueryRequest("ccs:test", 200, factory, List.of()); + OpenSearchQueryRequest.of("ccs:test", 200, factory, List.of()); @Mock private StreamOutput streamOutput; @Mock private StreamInput streamInput; @@ -88,7 +88,7 @@ void testWriteTo() throws IOException { sourceBuilderForSerializer.searchAfter(new Object[] {"value1", 123}); List includes = List.of("field1", "field2"); serializationRequest = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilderForSerializer, factory, @@ -120,7 +120,7 @@ void testWriteToWithoutSearchAfter() List includes = List.of("field1", "field2"); serializationRequest = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilderForSerializer, factory, @@ -139,7 +139,7 @@ void testWriteToWithoutSearchAfter() @Test void testWriteToWithoutPIT() { - serializationRequest = new OpenSearchQueryRequest("test", 200, factory, List.of()); + serializationRequest = OpenSearchQueryRequest.of("test", 200, factory, List.of()); UnsupportedOperationException exception = assertThrows( @@ -155,24 +155,21 @@ void testWriteToWithoutPIT() { @Test void search() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( - new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, List.of()); + OpenSearchQueryRequest.of("test", sourceBuilder, factory, List.of()); when(searchAction.apply(any())).thenReturn(searchResponse); when(searchResponse.getHits()).thenReturn(searchHits); when(searchHits.getHits()).thenReturn(new SearchHit[] {searchHit}); - OpenSearchResponse searchResponse = request.search(searchAction, scrollAction); - assertFalse(searchResponse.isEmpty()); - searchResponse = request.search(searchAction, scrollAction); - assertTrue(searchResponse.isEmpty()); + OpenSearchResponse response = request.search(searchAction, scrollAction); + assertFalse(response.isEmpty()); verify(searchAction, times(1)).apply(any()); } @Test void search_with_pit() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -203,7 +200,7 @@ void search_with_pit() { @Test void search_with_pit_hits_null() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -224,7 +221,7 @@ void search_with_pit_hits_empty() { SearchResponse searchResponse = mock(SearchResponse.class); SearchHits searchHits = mock(SearchHits.class); OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -245,7 +242,7 @@ void search_with_pit_null() { SearchResponse searchResponse = mock(SearchResponse.class); SearchHits searchHits = mock(SearchHits.class); OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -264,7 +261,7 @@ void search_with_pit_null() { @Test void has_another_batch() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -277,7 +274,7 @@ void has_another_batch() { @Test void has_another_batch_pid_null() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -290,7 +287,7 @@ void has_another_batch_pid_null() { @Test void has_another_batch_need_clean() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -308,8 +305,7 @@ void has_another_batch_need_clean() { @Test void search_withoutContext() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( - new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, List.of()); + OpenSearchQueryRequest.of("test", sourceBuilder, factory, List.of()); when(searchAction.apply(any())).thenReturn(searchResponse); when(searchResponse.getHits()).thenReturn(searchHits); @@ -322,8 +318,7 @@ void search_withoutContext() { @Test void search_withIncludes() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( - new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, List.of()); + OpenSearchQueryRequest.of("test", sourceBuilder, factory, List.of()); String[] includes = {"_id", "_index"}; when(searchAction.apply(any())).thenReturn(searchResponse); @@ -333,9 +328,6 @@ void search_withIncludes() { OpenSearchResponse searchResponse = request.search(searchAction, scrollAction); assertFalse(searchResponse.isEmpty()); - searchResponse = request.search(searchAction, scrollAction); - assertTrue(searchResponse.isEmpty()); - verify(searchAction, times(1)).apply(any()); } @@ -348,7 +340,7 @@ void clean() { @Test void testCleanConditionTrue() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -371,7 +363,7 @@ void testCleanConditionTrue() { @Test void testCleanConditionFalse_needCleanFalse() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, @@ -393,7 +385,7 @@ void testCleanConditionFalse_needCleanFalse() { @Test void testCleanConditionFalse_pidNull() { OpenSearchQueryRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), sourceBuilder, factory, diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java index 8db9176fb5e..1f21f1e769e 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java @@ -96,8 +96,8 @@ void build_query_request() { requestBuilder.pushDownTrackedScore(true); assertEquals( - new OpenSearchQueryRequest( - new OpenSearchRequest.IndexName("test"), + OpenSearchQueryRequest.of( + "test", new SearchSourceBuilder() .from(offset) .size(limit) @@ -105,7 +105,7 @@ void build_query_request() { .trackScores(true), exprValueFactory, List.of()), - requestBuilder.build(indexName, DEFAULT_QUERY_TIMEOUT, client)); + requestBuilder.build(indexName, null, client)); } @Test @@ -127,7 +127,7 @@ void build_PIT_request_with_correct_size() { requestBuilder.pushDownPageSize(2); assertEquals( - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), new SearchSourceBuilder().from(offset).size(2).timeout(DEFAULT_QUERY_TIMEOUT), exprValueFactory, @@ -146,7 +146,7 @@ void buildRequestWithPit_pageSizeNull_sizeGreaterThanMaxResultWindow() { requestBuilder.pushDownLimit(limit, offset); assertEquals( - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), new SearchSourceBuilder() .from(offset) @@ -167,12 +167,12 @@ void buildRequestWithPit_pageSizeNull_sizeLessThanMaxResultWindow() { requestBuilder.pushDownLimit(limit, offset); assertEquals( - new OpenSearchQueryRequest( - new OpenSearchRequest.IndexName("test"), + OpenSearchQueryRequest.of( + "test", new SearchSourceBuilder().from(offset).size(limit).timeout(DEFAULT_QUERY_TIMEOUT), exprValueFactory, List.of()), - requestBuilder.build(indexName, DEFAULT_QUERY_TIMEOUT, client)); + requestBuilder.build(indexName, null, client)); } @Test @@ -185,7 +185,7 @@ void buildRequestWithPit_pageSizeNotNull_startFromZero() { when(client.createPit(any(CreatePitRequest.class))).thenReturn("samplePITId"); assertEquals( - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), new SearchSourceBuilder().from(offset).size(pageSize).timeout(DEFAULT_QUERY_TIMEOUT), exprValueFactory, @@ -434,7 +434,7 @@ void test_push_down_project() { requestBuilder); assertEquals( - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), new SearchSourceBuilder() .from(DEFAULT_OFFSET) @@ -469,8 +469,8 @@ void test_push_down_project_limit() { requestBuilder); assertEquals( - new OpenSearchQueryRequest( - new OpenSearchRequest.IndexName("test"), + OpenSearchQueryRequest.of( + "test", new SearchSourceBuilder() .from(offset) .size(limit) @@ -478,7 +478,7 @@ void test_push_down_project_limit() { .fetchSource("intA", null), exprValueFactory, List.of("intA")), - requestBuilder.build(indexName, DEFAULT_QUERY_TIMEOUT, client)); + requestBuilder.build(indexName, null, client)); } @Test @@ -499,8 +499,8 @@ void test_push_down_project_limit_and_offset() { requestBuilder); assertEquals( - new OpenSearchQueryRequest( - new OpenSearchRequest.IndexName("test"), + OpenSearchQueryRequest.of( + "test", new SearchSourceBuilder() .from(offset) .size(limit) @@ -508,7 +508,7 @@ void test_push_down_project_limit_and_offset() { .fetchSource("intA", null), exprValueFactory, List.of("intA")), - requestBuilder.build(indexName, DEFAULT_QUERY_TIMEOUT, client)); + requestBuilder.build(indexName, null, client)); } @Test @@ -562,7 +562,7 @@ void test_push_down_project_with_alias_type() { requestBuilder); assertEquals( - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( new OpenSearchRequest.IndexName("test"), new SearchSourceBuilder() .from(DEFAULT_OFFSET) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java index 6c509c20c36..f9897f48dd2 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java @@ -5,7 +5,6 @@ package org.opensearch.sql.opensearch.response; -import static java.util.Collections.emptyList; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -35,6 +34,7 @@ import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.fetch.subphase.highlight.HighlightField; import org.opensearch.sql.data.model.ExprFloatValue; @@ -79,24 +79,25 @@ void isEmpty() { new TotalHits(2L, TotalHits.Relation.EQUAL_TO), 1.0F)); - var response = new OpenSearchResponse(searchResponse, factory, includes); + var response = OpenSearchResponse.of(searchResponse, factory, includes); assertFalse(response.isEmpty()); when(searchResponse.getHits()).thenReturn(SearchHits.empty()); when(searchResponse.getAggregations()).thenReturn(null); - response = new OpenSearchResponse(searchResponse, factory, includes); + response = OpenSearchResponse.of(searchResponse, factory, includes); assertTrue(response.isEmpty()); when(searchResponse.getHits()) .thenReturn(new SearchHits(null, new TotalHits(0, TotalHits.Relation.EQUAL_TO), 0)); - response = new OpenSearchResponse(searchResponse, factory, includes); + response = OpenSearchResponse.of(searchResponse, factory, includes); assertTrue(response.isEmpty()); when(searchResponse.getHits()).thenReturn(SearchHits.empty()); - when(searchResponse.getAggregations()).thenReturn(new Aggregations(emptyList())); + when(searchResponse.getAggregations()) + .thenReturn(new Aggregations(List.of(mock(Aggregation.class)))); - response = new OpenSearchResponse(searchResponse, factory, includes); + response = OpenSearchResponse.of(searchResponse, factory, includes); assertFalse(response.isEmpty()); } @@ -118,7 +119,7 @@ void iterator() { .thenReturn(exprTupleValue2); int i = 0; - for (ExprValue hit : new OpenSearchResponse(searchResponse, factory, List.of("id1"))) { + for (ExprValue hit : OpenSearchResponse.of(searchResponse, factory, List.of("id1"))) { if (i == 0) { assertEquals(exprTupleValue1.tupleValue().get("id"), hit.tupleValue().get("id")); } else if (i == 1) { @@ -168,7 +169,7 @@ void iterator_metafields() { List includes = List.of("id1", "_index", "_id", "_routing", "_sort", "_score", "_maxscore"); int i = 0; - for (ExprValue hit : new OpenSearchResponse(searchResponse, factory, includes)) { + for (ExprValue hit : OpenSearchResponse.of(searchResponse, factory, includes)) { if (i == 0) { assertEquals(exprTupleResponse, hit); } else { @@ -199,7 +200,7 @@ void iterator_metafields_withoutIncludes() { ExprTupleValue exprTupleResponse = ExprTupleValue.fromExprValueMap(ImmutableMap.of("id1", new ExprIntegerValue(1))); int i = 0; - for (ExprValue hit : new OpenSearchResponse(searchResponse, factory, includes)) { + for (ExprValue hit : OpenSearchResponse.of(searchResponse, factory, includes)) { if (i == 0) { assertEquals(exprTupleResponse, hit); } else { @@ -239,7 +240,7 @@ void iterator_metafields_scoreNaN() { "_id", new ExprStringValue("testId"), "_sort", new ExprLongValue(123456L))); int i = 0; - for (ExprValue hit : new OpenSearchResponse(searchResponse, factory, includes)) { + for (ExprValue hit : OpenSearchResponse.of(searchResponse, factory, includes)) { if (i == 0) { assertEquals(exprTupleResponse, hit); } else { @@ -263,7 +264,7 @@ void iterator_with_inner_hits() { when(factory.construct(any(), anyBoolean())).thenReturn(exprTupleValue1); - for (ExprValue hit : new OpenSearchResponse(searchResponse, factory, List.of("id1"))) { + for (ExprValue hit : OpenSearchResponse.of(searchResponse, factory, List.of("id1"))) { assertEquals(exprTupleValue1, hit); } } @@ -272,7 +273,7 @@ void iterator_with_inner_hits() { void response_is_aggregation_when_aggregation_not_empty() { when(searchResponse.getAggregations()).thenReturn(aggregations); - OpenSearchResponse response = new OpenSearchResponse(searchResponse, factory, includes); + OpenSearchResponse response = OpenSearchResponse.of(searchResponse, factory, includes); assertTrue(response.isAggregationResponse()); } @@ -280,7 +281,7 @@ void response_is_aggregation_when_aggregation_not_empty() { void response_isnot_aggregation_when_aggregation_is_empty() { when(searchResponse.getAggregations()).thenReturn(null); - OpenSearchResponse response = new OpenSearchResponse(searchResponse, factory, includes); + OpenSearchResponse response = OpenSearchResponse.of(searchResponse, factory, includes); assertFalse(response.isAggregationResponse()); } @@ -297,7 +298,7 @@ void aggregation_iterator() { .thenReturn(new ExprIntegerValue(2)); int i = 0; - for (ExprValue hit : new OpenSearchResponse(searchResponse, factory, includes)) { + for (ExprValue hit : OpenSearchResponse.of(searchResponse, factory, includes)) { if (i == 0) { assertEquals(exprTupleValue1, hit); } else if (i == 1) { @@ -329,7 +330,7 @@ void highlight_iterator() { when(searchHit1.getHighlightFields()).thenReturn(highlightMap); when(factory.construct(any(), anyBoolean())).thenReturn(resultTuple); - for (ExprValue resultHit : new OpenSearchResponse(searchResponse, factory, includes)) { + for (ExprValue resultHit : OpenSearchResponse.of(searchResponse, factory, includes)) { var expected = ExprValueUtils.collectionValue( Arrays.stream(searchHit.getHighlightFields().get("highlights").getFragments()) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScannerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScannerTest.java index f4a7f297df9..c7531c37758 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScannerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/BackgroundSearchScannerTest.java @@ -22,6 +22,7 @@ import org.junit.jupiter.api.Test; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchQueryRequest; import org.opensearch.sql.opensearch.request.OpenSearchRequest; import org.opensearch.sql.opensearch.response.OpenSearchResponse; import org.opensearch.threadpool.ThreadPool; @@ -40,14 +41,14 @@ void setUp() { client = mock(OpenSearchClient.class); nodeClient = mock(NodeClient.class); threadPool = mock(ThreadPool.class); - request = mock(OpenSearchRequest.class); + request = mock(OpenSearchQueryRequest.class); executor = Executors.newSingleThreadExecutor(); when(client.getNodeClient()).thenReturn(Optional.of(nodeClient)); when(nodeClient.threadPool()).thenReturn(threadPool); when(threadPool.executor(any())).thenReturn(executor); - scanner = new BackgroundSearchScanner(client); + scanner = new BackgroundSearchScanner(client, 10, 10); } @Test @@ -55,13 +56,13 @@ void testSyncFallbackWhenNoNodeClient() { // Setup client without node client OpenSearchClient syncClient = mock(OpenSearchClient.class); when(syncClient.getNodeClient()).thenReturn(Optional.empty()); - scanner = new BackgroundSearchScanner(syncClient); + scanner = new BackgroundSearchScanner(syncClient, 10, 10); OpenSearchResponse response = mockResponse(false, false, 10); when(syncClient.search(request)).thenReturn(response); scanner.startScanning(request); - BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request, 10); + BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request); assertFalse( result.stopIteration(), "Expected iteration to continue after fetching one full page"); @@ -80,13 +81,13 @@ void testCompleteScanWithMultipleBatches() { scanner.startScanning(request); // First batch - BackgroundSearchScanner.SearchBatchResult result1 = scanner.fetchNextBatch(request, 10); + BackgroundSearchScanner.SearchBatchResult result1 = scanner.fetchNextBatch(request); assertFalse( result1.stopIteration(), "Expected iteration to continue after fetching 10/15 results"); assertTrue(result1.iterator().hasNext()); // Second batch - BackgroundSearchScanner.SearchBatchResult result2 = scanner.fetchNextBatch(request, 10); + BackgroundSearchScanner.SearchBatchResult result2 = scanner.fetchNextBatch(request); assertTrue(result2.stopIteration()); assertFalse(result2.iterator().hasNext()); } @@ -97,7 +98,7 @@ void testFetchOnceForAggregationResponse() { when(client.search(request)).thenReturn(response); scanner.startScanning(request); - BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request, 10); + BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request); assertTrue(scanner.isScanDone()); } @@ -108,7 +109,7 @@ void testFetchOnceWhenResultsBelowWindow() { when(client.search(request)).thenReturn(response); scanner.startScanning(request); - BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request, 10); + BackgroundSearchScanner.SearchBatchResult result = scanner.fetchNextBatch(request); assertTrue(scanner.isScanDone()); } @@ -121,8 +122,8 @@ void testReset() { when(client.search(request)).thenReturn(response1).thenReturn(response2); scanner.startScanning(request); - scanner.fetchNextBatch(request, 10); - scanner.fetchNextBatch(request, 10); + scanner.fetchNextBatch(request); + scanner.fetchNextBatch(request); assertTrue(scanner.isScanDone()); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanPaginationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanPaginationTest.java index 05d0c78ce31..fb511645d62 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanPaginationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanPaginationTest.java @@ -95,7 +95,7 @@ void dont_serialize_if_no_cursor() { OpenSearchRequest request = mock(); OpenSearchResponse response = mock(); when(builder.build(any(), any(), any())).thenReturn(request); - when(client.search(any())).thenReturn(response); + lenient().when(client.search(any())).thenReturn(response); try (var indexScan = new OpenSearchIndexScan(client, builder.build(INDEX_NAME, SCROLL_TIMEOUT, client))) { indexScan.open(); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanTest.java index 08d60b66a3a..88bc6e1190d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanTest.java @@ -119,11 +119,11 @@ void serialize_PIT(Integer numberOfIncludes) { .map(i -> "column" + i) .collect(Collectors.toList()); var request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( INDEX_NAME, searchSourceBuilder, factory, includes, CURSOR_KEEP_ALIVE, "samplePitId"); // make a response, so OpenSearchResponse::isEmpty would return true and unset needClean var response = mock(SearchResponse.class); - when(response.getAggregations()).thenReturn(mock()); + when(response.getAggregations()).thenReturn(null); var hits = mock(SearchHits.class); when(response.getHits()).thenReturn(hits); SearchHit hit = mock(SearchHit.class); @@ -396,7 +396,7 @@ PushDownAssertion shouldQueryHighlight(QueryBuilder query, HighlightBuilder high .size(MAX_RESULT_WINDOW) .highlighter(highlight); OpenSearchRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( EMPLOYEES_INDEX, sourceBuilder, factory, List.of(), CURSOR_KEEP_ALIVE, null); when(client.search(request)).thenReturn(response); @@ -415,7 +415,7 @@ PushDownAssertion shouldQuery(QueryBuilder expected) { .size(MAX_RESULT_WINDOW) .timeout(CURSOR_KEEP_ALIVE); OpenSearchRequest request = - new OpenSearchQueryRequest( + OpenSearchQueryRequest.pitOf( EMPLOYEES_INDEX, builder, factory, List.of(), CURSOR_KEEP_ALIVE, null); when(client.search(request)).thenReturn(response); var indexScan = diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java index 81597c65bbe..9258d7bf4db 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAggregationTest.java @@ -989,4 +989,85 @@ public void testSortAggregationMetrics2() { + "ORDER BY 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testHaving1() { + String ppl = "source=EMP | stats avg(SAL) as avg by DEPTNO | where avg > 0"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalFilter(condition=[>($0, 0)])\n" + + " LogicalProject(avg=[$1], DEPTNO=[$0])\n" + + " LogicalAggregate(group=[{0}], avg=[AVG($1)])\n" + + " LogicalProject(DEPTNO=[$7], SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "avg=2175.; DEPTNO=20\navg=2916.666666; DEPTNO=10\navg=1566.666666; DEPTNO=30\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT AVG(`SAL`) `avg`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `DEPTNO`) `t1`\n" + + "WHERE `avg` > 0"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testHaving2() { + String ppl = + "source=EMP | stats bucket_nullable = false avg(SAL) as avg by DEPTNO | where avg > 0"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalFilter(condition=[>($0, 0)])\n" + + " LogicalProject(avg=[$1], DEPTNO=[$0])\n" + + " LogicalAggregate(group=[{0}], avg=[AVG($1)])\n" + + " LogicalProject(DEPTNO=[$7], SAL=[$5])\n" + + " LogicalFilter(condition=[IS NOT NULL($7)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "avg=2175.; DEPTNO=20\navg=2916.666666; DEPTNO=10\navg=1566.666666; DEPTNO=30\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT AVG(`SAL`) `avg`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` IS NOT NULL\n" + + "GROUP BY `DEPTNO`) `t2`\n" + + "WHERE `avg` > 0"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testHaving3() { + String ppl = + "source=EMP | stats avg(SAL) as avg, count() as cnt by DEPTNO | eval new_avg = avg + 1000," + + " new_cnt = cnt + 1 | where new_avg > 1000 or new_cnt > 2"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalFilter(condition=[OR(>($3, 1000), >($4, 2))])\n" + + " LogicalProject(avg=[$1], cnt=[$2], DEPTNO=[$0], new_avg=[+($1, 1000)]," + + " new_cnt=[+($2, 1)])\n" + + " LogicalAggregate(group=[{0}], avg=[AVG($1)], cnt=[COUNT()])\n" + + " LogicalProject(DEPTNO=[$7], SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "avg=2175.; cnt=5; DEPTNO=20; new_avg=3175.; new_cnt=6\n" + + "avg=2916.666666; cnt=3; DEPTNO=10; new_avg=3916.666666; new_cnt=4\n" + + "avg=1566.666666; cnt=6; DEPTNO=30; new_avg=2566.666666; new_cnt=7\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT *\n" + + "FROM (SELECT AVG(`SAL`) `avg`, COUNT(*) `cnt`, `DEPTNO`, AVG(`SAL`) + 1000" + + " `new_avg`, COUNT(*) + 1 `new_cnt`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `DEPTNO`) `t1`\n" + + "WHERE `new_avg` > 1000 OR `new_cnt` > 2"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } From e4b5d93050c60df44fe42a93b55a5c23ccfb7e64 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Mon, 8 Dec 2025 12:02:10 -0800 Subject: [PATCH 95/99] Add unified query transpiler API (#4871) * Add basic transpiler impl Signed-off-by: Chen Dai * Add builder Signed-off-by: Chen Dai * Use lombok builder Signed-off-by: Chen Dai * Modify unified query planner UT to extend new test base class Signed-off-by: Chen Dai * Update doc with API design caveat Signed-off-by: Chen Dai * Move opensearch spark sql dialect out of test folder Signed-off-by: Chen Dai * Update doc and test assertion message Signed-off-by: Chen Dai * Fix line separator and license header Signed-off-by: Chen Dai --------- Signed-off-by: Chen Dai --- api/README.md | 56 ++++++++++++++++- api/build.gradle | 5 ++ .../sql/api/EmptyDataSourceService.java | 5 ++ .../transpiler/UnifiedQueryTranspiler.java | 40 +++++++++++++ .../sql/api/UnifiedQueryPlannerTest.java | 52 +++++----------- .../sql/api/UnifiedQueryTestBase.java | 60 +++++++++++++++++++ .../UnifiedQueryTranspilerTest.java | 59 ++++++++++++++++++ .../calcite/OpenSearchSparkSqlDialect.java | 0 8 files changed, 238 insertions(+), 39 deletions(-) create mode 100644 api/src/main/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspiler.java create mode 100644 api/src/test/java/org/opensearch/sql/api/UnifiedQueryTestBase.java create mode 100644 api/src/test/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspilerTest.java rename ppl/src/{test => main}/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java (100%) diff --git a/api/README.md b/api/README.md index 0288b7ad22c..c380a1a7128 100644 --- a/api/README.md +++ b/api/README.md @@ -4,10 +4,21 @@ This module provides a high-level integration layer for the Calcite-based query ## Overview -The `UnifiedQueryPlanner` serves as the primary entry point for external consumers. It accepts PPL (Piped Processing Language) queries and returns Calcite `RelNode` logical plans as intermediate representation. +This module provides two primary components: + +- **`UnifiedQueryPlanner`**: Accepts PPL (Piped Processing Language) queries and returns Calcite `RelNode` logical plans as intermediate representation. +- **`UnifiedQueryTranspiler`**: Converts Calcite logical plans (`RelNode`) into SQL strings for various target databases using different SQL dialects. + +Together, these components enable a complete workflow: parse PPL queries into logical plans, then transpile those plans into target database SQL. + +### Experimental API Design + +**This API is currently experimental.** The design intentionally exposes Calcite abstractions (`Schema` for catalogs, `RelNode` as IR, `SqlDialect` for dialects) rather than creating custom wrapper interfaces. This is to avoid overdesign by leveraging the flexible Calcite interface in the short term. If a more abstracted API becomes necessary in the future, breaking changes may be introduced with the new abstraction layer. ## Usage +### UnifiedQueryPlanner + Use the declarative, fluent builder API to initialize the `UnifiedQueryPlanner`. ```java @@ -21,6 +32,49 @@ UnifiedQueryPlanner planner = UnifiedQueryPlanner.builder() RelNode plan = planner.plan("source = opensearch.test"); ``` +### UnifiedQueryTranspiler + +Use `UnifiedQueryTranspiler` to convert Calcite logical plans into SQL strings for target databases. The transpiler supports various SQL dialects through Calcite's `SqlDialect` interface. + +```java +UnifiedQueryTranspiler transpiler = UnifiedQueryTranspiler.builder() + .dialect(SparkSqlDialect.DEFAULT) + .build(); + +String sql = transpiler.toSql(plan); +``` + +### Complete Workflow Example + +Combining both components to transpile PPL queries into target database SQL: + +```java +// Step 1: Initialize planner +UnifiedQueryPlanner planner = UnifiedQueryPlanner.builder() + .language(QueryType.PPL) + .catalog("catalog", schema) + .defaultNamespace("catalog") + .build(); + +// Step 2: Parse PPL query into logical plan +RelNode plan = planner.plan("source = employees | where age > 30"); + +// Step 3: Initialize transpiler with target dialect +UnifiedQueryTranspiler transpiler = UnifiedQueryTranspiler.builder() + .dialect(SparkSqlDialect.DEFAULT) + .build(); + +// Step 4: Transpile to target SQL +String sparkSql = transpiler.toSql(plan); +// Result: SELECT * FROM `catalog`.`employees` WHERE `age` > 30 +``` + +Supported SQL dialects include: +- `SparkSqlDialect.DEFAULT` - Apache Spark SQL +- `PostgresqlSqlDialect.DEFAULT` - PostgreSQL +- `MysqlSqlDialect.DEFAULT` - MySQL +- And other Calcite-supported dialects + ## Development & Testing A set of unit tests is provided to validate planner behavior. diff --git a/api/build.gradle b/api/build.gradle index dfd0e25b902..5ce00169597 100644 --- a/api/build.gradle +++ b/api/build.gradle @@ -5,6 +5,7 @@ plugins { id 'java-library' + id "io.freefair.lombok" id 'jacoco' id 'com.diffplug.spotless' } @@ -25,6 +26,10 @@ spotless { exclude '**/build/**', '**/build-*/**', 'src/main/gen/**' } importOrder() + licenseHeader("/*\n" + + " * Copyright OpenSearch Contributors\n" + + " * SPDX-License-Identifier: Apache-2.0\n" + + " */\n\n") removeUnusedImports() trimTrailingWhitespace() endWithNewline() diff --git a/api/src/main/java/org/opensearch/sql/api/EmptyDataSourceService.java b/api/src/main/java/org/opensearch/sql/api/EmptyDataSourceService.java index 0fa0c38ad3c..0b42279d9db 100644 --- a/api/src/main/java/org/opensearch/sql/api/EmptyDataSourceService.java +++ b/api/src/main/java/org/opensearch/sql/api/EmptyDataSourceService.java @@ -1,3 +1,8 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + package org.opensearch.sql.api; import java.util.Map; diff --git a/api/src/main/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspiler.java b/api/src/main/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspiler.java new file mode 100644 index 00000000000..dc8c131c5e7 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspiler.java @@ -0,0 +1,40 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.transpiler; + +import lombok.Builder; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.rel2sql.RelToSqlConverter; +import org.apache.calcite.sql.SqlDialect; +import org.apache.calcite.sql.SqlNode; + +/** + * Transpiles Calcite logical plans ({@link RelNode}) into SQL strings for various target databases. + * Uses Calcite's {@link RelToSqlConverter} to perform the conversion, respecting the specified SQL + * dialect. + */ +@Builder +public class UnifiedQueryTranspiler { + + /** Target SQL dialect */ + private final SqlDialect dialect; + + /** + * Converts a Calcite logical plan to a SQL string using the configured target dialect. + * + * @param plan the logical plan to convert (must not be null) + * @return the generated SQL string + */ + public String toSql(RelNode plan) { + try { + RelToSqlConverter converter = new RelToSqlConverter(dialect); + SqlNode sqlNode = converter.visitRoot(plan).asStatement(); + return sqlNode.toSqlString(dialect).getSql(); + } catch (Exception e) { + throw new IllegalStateException("Failed to transpile logical plan to SQL", e); + } + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java index 0f7754ba501..754e36c092e 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java @@ -8,41 +8,15 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThrows; -import java.util.List; import java.util.Map; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.schema.Schema; -import org.apache.calcite.schema.Table; import org.apache.calcite.schema.impl.AbstractSchema; -import org.apache.calcite.schema.impl.AbstractTable; -import org.apache.calcite.sql.type.SqlTypeName; import org.junit.Test; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.executor.QueryType; -public class UnifiedQueryPlannerTest { - - /** Test schema consists of a test table with id and name columns */ - private final AbstractSchema testSchema = - new AbstractSchema() { - @Override - protected Map getTableMap() { - return Map.of( - "index", - new AbstractTable() { - @Override - public RelDataType getRowType(RelDataTypeFactory typeFactory) { - return typeFactory.createStructType( - List.of( - typeFactory.createSqlType(SqlTypeName.INTEGER), - typeFactory.createSqlType(SqlTypeName.VARCHAR)), - List.of("id", "name")); - } - }); - } - }; +public class UnifiedQueryPlannerTest extends UnifiedQueryTestBase { /** Test catalog consists of test schema above */ private final AbstractSchema testDeepSchema = @@ -61,7 +35,7 @@ public void testPPLQueryPlanning() { .catalog("opensearch", testSchema) .build(); - RelNode plan = planner.plan("source = opensearch.index | eval f = abs(id)"); + RelNode plan = planner.plan("source = opensearch.employees | eval f = abs(id)"); assertNotNull("Plan should be created", plan); } @@ -74,8 +48,8 @@ public void testPPLQueryPlanningWithDefaultNamespace() { .defaultNamespace("opensearch") .build(); - assertNotNull("Plan should be created", planner.plan("source = opensearch.index")); - assertNotNull("Plan should be created", planner.plan("source = index")); + assertNotNull("Plan should be created", planner.plan("source = opensearch.employees")); + assertNotNull("Plan should be created", planner.plan("source = employees")); } @Test @@ -87,12 +61,12 @@ public void testPPLQueryPlanningWithDefaultNamespaceMultiLevel() { .defaultNamespace("catalog.opensearch") .build(); - assertNotNull("Plan should be created", planner.plan("source = catalog.opensearch.index")); - assertNotNull("Plan should be created", planner.plan("source = index")); + assertNotNull("Plan should be created", planner.plan("source = catalog.opensearch.employees")); + assertNotNull("Plan should be created", planner.plan("source = employees")); // This is valid in SparkSQL, but Calcite requires "catalog" as the default root schema to // resolve it - assertThrows(IllegalStateException.class, () -> planner.plan("source = opensearch.index")); + assertThrows(IllegalStateException.class, () -> planner.plan("source = opensearch.employees")); } @Test @@ -105,7 +79,8 @@ public void testPPLQueryPlanningWithMultipleCatalogs() { .build(); RelNode plan = - planner.plan("source = catalog1.index | lookup catalog2.index id | eval f = abs(id)"); + planner.plan( + "source = catalog1.employees | lookup catalog2.employees id | eval f = abs(id)"); assertNotNull("Plan should be created with multiple catalogs", plan); } @@ -119,7 +94,8 @@ public void testPPLQueryPlanningWithMultipleCatalogsAndDefaultNamespace() { .defaultNamespace("catalog2") .build(); - RelNode plan = planner.plan("source = catalog1.index | lookup index id | eval f = abs(id)"); + RelNode plan = + planner.plan("source = catalog1.employees | lookup employees id | eval f = abs(id)"); assertNotNull("Plan should be created with multiple catalogs", plan); } @@ -132,7 +108,7 @@ public void testPPLQueryPlanningWithMetadataCaching() { .cacheMetadata(true) .build(); - RelNode plan = planner.plan("source = opensearch.index"); + RelNode plan = planner.plan("source = opensearch.employees"); assertNotNull("Plan should be created", plan); } @@ -166,7 +142,7 @@ public void testUnsupportedStatementType() { .catalog("opensearch", testSchema) .build(); - planner.plan("explain source = index"); // explain statement + planner.plan("explain source = employees"); // explain statement } @Test(expected = SyntaxCheckException.class) @@ -177,6 +153,6 @@ public void testPlanPropagatingSyntaxCheckException() { .catalog("opensearch", testSchema) .build(); - planner.plan("source = index | eval"); // Trigger syntax error from parser + planner.plan("source = employees | eval"); // Trigger syntax error from parser } } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryTestBase.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryTestBase.java new file mode 100644 index 00000000000..f63bfed09ec --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryTestBase.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import java.util.List; +import java.util.Map; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.schema.Table; +import org.apache.calcite.schema.impl.AbstractSchema; +import org.apache.calcite.schema.impl.AbstractTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.Before; +import org.opensearch.sql.executor.QueryType; + +/** Base class for unified query tests providing common test schema and utilities. */ +public abstract class UnifiedQueryTestBase { + + /** Test schema containing sample tables for testing */ + protected AbstractSchema testSchema; + + /** Unified query planner configured with test schema */ + protected UnifiedQueryPlanner planner; + + @Before + public void setUp() { + testSchema = + new AbstractSchema() { + @Override + protected Map getTableMap() { + return Map.of("employees", createEmployeesTable()); + } + }; + + planner = + UnifiedQueryPlanner.builder() + .language(QueryType.PPL) + .catalog("catalog", testSchema) + .defaultNamespace("catalog") + .build(); + } + + protected Table createEmployeesTable() { + return new AbstractTable() { + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return typeFactory.createStructType( + List.of( + typeFactory.createSqlType(SqlTypeName.INTEGER), + typeFactory.createSqlType(SqlTypeName.VARCHAR), + typeFactory.createSqlType(SqlTypeName.INTEGER), + typeFactory.createSqlType(SqlTypeName.VARCHAR)), + List.of("id", "name", "age", "department")); + } + }; + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspilerTest.java b/api/src/test/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspilerTest.java new file mode 100644 index 00000000000..f0ad4133c92 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspilerTest.java @@ -0,0 +1,59 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.transpiler; + +import static org.junit.Assert.assertEquals; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.sql.dialect.SparkSqlDialect; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.ppl.calcite.OpenSearchSparkSqlDialect; + +public class UnifiedQueryTranspilerTest extends UnifiedQueryTestBase { + + private UnifiedQueryTranspiler transpiler; + + @Before + public void setUp() { + super.setUp(); + transpiler = UnifiedQueryTranspiler.builder().dialect(SparkSqlDialect.DEFAULT).build(); + } + + @Test + public void testToSql() { + String pplQuery = "source = employees"; + RelNode plan = planner.plan(pplQuery); + + String actualSql = transpiler.toSql(plan); + String expectedSql = normalize("SELECT *\nFROM `catalog`.`employees`"); + assertEquals( + "Transpiled SQL using SparkSqlDialect should match expected SQL", expectedSql, actualSql); + } + + @Test + public void testToSqlWithCustomDialect() { + String pplQuery = "source = employees | where name = 123"; + RelNode plan = planner.plan(pplQuery); + + UnifiedQueryTranspiler customTranspiler = + UnifiedQueryTranspiler.builder().dialect(OpenSearchSparkSqlDialect.DEFAULT).build(); + String actualSql = customTranspiler.toSql(plan); + String expectedSql = + normalize( + "SELECT *\nFROM `catalog`.`employees`\nWHERE TRY_CAST(`name` AS DOUBLE) = 1.230E2"); + assertEquals( + "Transpiled query using OpenSearchSparkSqlDialect should translate SAFE_CAST to TRY_CAST", + expectedSql, + actualSql); + } + + /** Normalizes line endings to platform-specific format for cross-platform test compatibility. */ + private String normalize(String sql) { + return sql.replace("\n", System.lineSeparator()); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java b/ppl/src/main/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java similarity index 100% rename from ppl/src/test/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java rename to ppl/src/main/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java From 01527f04e90abc9936b22e9eb1684ff7790c904b Mon Sep 17 00:00:00 2001 From: Kai Huang <105710027+ahkcs@users.noreply.github.com> Date: Mon, 8 Dec 2025 13:47:56 -0800 Subject: [PATCH 96/99] Support `split` eval function (#4814) * Support split eval function Signed-off-by: Kai Huang # Conflicts: # core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java # ppl/src/main/antlr/OpenSearchPPLLexer.g4 # ppl/src/main/antlr/OpenSearchPPLParser.g4 # ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java # ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java * doctest Signed-off-by: Kai Huang * Update test cases Signed-off-by: Kai Huang * Update to not use UDF Signed-off-by: Kai Huang --------- Signed-off-by: Kai Huang --- .../function/BuiltinFunctionName.java | 1 + .../expression/function/PPLFuncImpTable.java | 29 +++++++ docs/user/ppl/functions/collection.rst | 54 +++++++++++++ .../remote/CalciteArrayFunctionIT.java | 39 ++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../calcite/CalcitePPLArrayFunctionTest.java | 77 +++++++++++++++++++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 16 ++++ 8 files changed, 218 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 21b938532ba..72be016ecc2 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -75,6 +75,7 @@ public enum BuiltinFunctionName { MVAPPEND(FunctionName.of("mvappend")), MVJOIN(FunctionName.of("mvjoin")), MVINDEX(FunctionName.of("mvindex")), + SPLIT(FunctionName.of("split")), MVDEDUP(FunctionName.of("mvdedup")), FORALL(FunctionName.of("forall")), EXISTS(FunctionName.of("exists")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index aa15c3228f1..d83dcfaeac1 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -195,6 +195,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.SINH; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN_BUCKET; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPLIT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SQRT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_POP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.STDDEV_SAMP; @@ -991,6 +992,34 @@ void populate() { builder.makeCall(SqlLibraryOperators.ARRAY_JOIN, array, delimiter), PPLTypeChecker.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER)); + // Register SPLIT with custom logic for empty delimiter + // Case 1: Delimiter is not empty string, use SPLIT + // Case 2: Delimiter is empty string, use REGEXP_EXTRACT_ALL with '.' pattern + register( + SPLIT, + (FunctionImp2) + (builder, str, delimiter) -> { + // Create condition: delimiter = '' + RexNode emptyString = builder.makeLiteral(""); + RexNode isEmptyDelimiter = + builder.makeCall(SqlStdOperatorTable.EQUALS, delimiter, emptyString); + + // For empty delimiter: split into characters using REGEXP_EXTRACT_ALL with '.' + // pattern This matches each individual character + RexNode dotPattern = builder.makeLiteral("."); + RexNode splitChars = + builder.makeCall(SqlLibraryOperators.REGEXP_EXTRACT_ALL, str, dotPattern); + + // For non-empty delimiter: use standard SPLIT + RexNode normalSplit = builder.makeCall(SqlLibraryOperators.SPLIT, str, delimiter); + + // Use CASE to choose between the two approaches + // CASE WHEN isEmptyDelimiter THEN splitChars ELSE normalSplit END + return builder.makeCall( + SqlStdOperatorTable.CASE, isEmptyDelimiter, splitChars, normalSplit); + }, + PPLTypeChecker.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER)); + // Register MVINDEX to use Calcite's ITEM/ARRAY_SLICE with index normalization register( MVINDEX, diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst index 34c02074641..fdea75d3e81 100644 --- a/docs/user/ppl/functions/collection.rst +++ b/docs/user/ppl/functions/collection.rst @@ -186,6 +186,60 @@ Example:: | 120 | +--------+ +SPLIT +----- + +Description +>>>>>>>>>>> + +Usage: split(str, delimiter) splits the string values on the delimiter and returns the string values as a multivalue field (array). Use an empty string ("") to split the original string into one value per character. If the delimiter is not found, returns an array containing the original string. If the input string is empty, returns an empty array. + +Argument type: str: STRING, delimiter: STRING + +Return type: ARRAY of STRING + +Example:: + + os> source=people | eval test = 'buttercup;rarity;tenderhoof;dash', result = split(test, ';') | fields result | head 1 + fetched rows / total rows = 1/1 + +------------------------------------+ + | result | + |------------------------------------| + | [buttercup,rarity,tenderhoof,dash] | + +------------------------------------+ + + os> source=people | eval test = '1a2b3c4def567890', result = split(test, 'def') | fields result | head 1 + fetched rows / total rows = 1/1 + +------------------+ + | result | + |------------------| + | [1a2b3c4,567890] | + +------------------+ + + os> source=people | eval test = 'abcd', result = split(test, '') | fields result | head 1 + fetched rows / total rows = 1/1 + +-----------+ + | result | + |-----------| + | [a,b,c,d] | + +-----------+ + + os> source=people | eval test = 'name::value', result = split(test, '::') | fields result | head 1 + fetched rows / total rows = 1/1 + +--------------+ + | result | + |--------------| + | [name,value] | + +--------------+ + + os> source=people | eval test = 'hello', result = split(test, ',') | fields result | head 1 + fetched rows / total rows = 1/1 + +---------+ + | result | + |---------| + | [hello] | + +---------+ + MVJOIN ------ diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java index 52a6e181e20..31556e518b9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java @@ -567,4 +567,43 @@ public void testMvdedupPreservesOrder() throws IOException { // Should preserve first occurrence order: z, a, b, c verifyDataRows(actual, rows(List.of("z", "a", "b", "c"))); } + + @Test + public void testSplitWithSemicolonDelimiter() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval test = 'buttercup;rarity;tenderhoof;dash;mcintosh', result =" + + " split(test, ';') | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of("buttercup", "rarity", "tenderhoof", "dash", "mcintosh"))); + } + + @Test + public void testSplitWithMultiCharDelimiter() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval test = '1a2b3c4def567890', result = split(test, 'def') | head 1 |" + + " fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + verifyDataRows(actual, rows(List.of("1a2b3c4", "567890"))); + } + + @Test + public void testSplitWithEmptyDelimiter() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval test = 'abcd', result = split(test, '') | head 1 | fields result", + TEST_INDEX_BANK)); + + verifySchema(actual, schema("result", "array")); + // Empty delimiter splits into individual characters + verifyDataRows(actual, rows(List.of("a", "b", "c", "d"))); + } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 2c976f2fcf0..9b8ec9a2069 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -446,6 +446,7 @@ MVAPPEND: 'MVAPPEND'; MVJOIN: 'MVJOIN'; MVINDEX: 'MVINDEX'; MVDEDUP: 'MVDEDUP'; +SPLIT: 'SPLIT'; FORALL: 'FORALL'; FILTER: 'FILTER'; TRANSFORM: 'TRANSFORM'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 69fdcafc4c6..cd267bad38d 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1098,6 +1098,7 @@ collectionFunctionName | MVJOIN | MVINDEX | MVDEDUP + | SPLIT | FORALL | EXISTS | FILTER diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java index 176fb534f37..96529adea24 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java @@ -290,4 +290,81 @@ public void testMvdedupPreservesOrder() { + "LIMIT 1"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testSplitWithSemicolonDelimiter() { + String ppl = + "source=EMP | eval test = 'buttercup;rarity;tenderhoof', result = split(test, ';') | head" + + " 1 | fields result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], test=['buttercup;rarity;tenderhoof':VARCHAR]," + + " result=[CASE(=(';', '')," + + " REGEXP_EXTRACT_ALL('buttercup;rarity;tenderhoof':VARCHAR, '.')," + + " SPLIT('buttercup;rarity;tenderhoof':VARCHAR, ';'))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT CASE WHEN ';' = '' THEN REGEXP_EXTRACT_ALL('buttercup;rarity;tenderhoof', " + + "'.') ELSE SPLIT('buttercup;rarity;tenderhoof', ';') END " + + "`result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testSplitWithMultiCharDelimiter() { + String ppl = + "source=EMP | eval test = '1a2b3c4def567890', result = split(test, 'def') | head 1 |" + + " fields result"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], test=['1a2b3c4def567890':VARCHAR]," + + " result=[CASE(=('def':VARCHAR, ''), REGEXP_EXTRACT_ALL('1a2b3c4def567890':VARCHAR," + + " '.'), SPLIT('1a2b3c4def567890':VARCHAR, 'def':VARCHAR))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT CASE WHEN 'def' = '' THEN REGEXP_EXTRACT_ALL('1a2b3c4def567890', " + + "'.') ELSE SPLIT('1a2b3c4def567890', 'def') END `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testSplitWithEmptyDelimiter() { + String ppl = + "source=EMP | eval test = 'abcd', result = split(test, '') | head 1 | fields result"; + RelNode root = getRelNode(ppl); + + // With empty delimiter, should split into individual characters + String expectedLogical = + "LogicalProject(result=[$9])\n" + + " LogicalSort(fetch=[1])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], test=['abcd':VARCHAR]," + + " result=[CASE(=('':VARCHAR, ''), REGEXP_EXTRACT_ALL('abcd':VARCHAR," + + " '.'), SPLIT('abcd':VARCHAR, '':VARCHAR))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT CASE WHEN '' = '' THEN REGEXP_EXTRACT_ALL('abcd', '.') " + + "ELSE SPLIT('abcd', '') END `result`\n" + + "FROM `scott`.`EMP`\n" + + "LIMIT 1"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index ede8c4e4a5a..3fd4ed3694f 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -845,6 +845,22 @@ public void testMvindex() { anonymize("source=t | eval result=mvindex(array(1, 2, 3, 4, 5), 1, 3) | fields result")); } + @Test + public void testSplit() { + // Test split with delimiter + assertEquals( + "source=table | eval identifier=split(***,***) | fields + identifier", + anonymize("source=t | eval result=split('a;b;c', ';') | fields result")); + // Test split with field reference + assertEquals( + "source=table | eval identifier=split(identifier,***) | fields + identifier", + anonymize("source=t | eval result=split(text, ',') | fields result")); + // Test split with empty delimiter (splits into characters) + assertEquals( + "source=table | eval identifier=split(***,***) | fields + identifier", + anonymize("source=t | eval result=split('abcd', '') | fields result")); + } + @Test public void testMvdedup() { // Test mvdedup with array containing duplicates From 29438266975395378f9798137027119ce317b094 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Mon, 8 Dec 2025 16:02:35 -0800 Subject: [PATCH 97/99] Add workflow for SQL CLI integration tests (#4770) * Add workflow for SQL CLI integration tests Signed-off-by: Simeon Widdis * Switch SQL checkout path for CLI Signed-off-by: Simeon Widdis * Tweak paths and mkdir for remote Signed-off-by: Simeon Widdis * Fix filename Signed-off-by: Simeon Widdis * Remove redundant verification Signed-off-by: Simeon Widdis * Don't pull after clone Signed-off-by: Simeon Widdis --------- Signed-off-by: Simeon Widdis --- .../workflows/sql-cli-integration-test.yml | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 .github/workflows/sql-cli-integration-test.yml diff --git a/.github/workflows/sql-cli-integration-test.yml b/.github/workflows/sql-cli-integration-test.yml new file mode 100644 index 00000000000..63f3e91d334 --- /dev/null +++ b/.github/workflows/sql-cli-integration-test.yml @@ -0,0 +1,96 @@ +name: SQL CLI Integration Test + +# This workflow tests sql-cli against the current SQL changes +# to catch breaking changes before they're published + +on: + pull_request: + paths: + - '**/*.java' + - '**/*.g4' + - '!sql-jdbc/**' + - '**gradle*' + - '**lombok*' + - 'integ-test/**' + - '**/*.jar' + - '**/*.pom' + - '.github/workflows/sql-cli-integration-test.yml' + push: + branches: + - main + - '[0-9]+.[0-9]+' + - '[0-9]+.x' + paths: + - '**/*.java' + - '**/*.g4' + - '!sql-jdbc/**' + - '**gradle*' + - '**lombok*' + - 'integ-test/**' + - '**/*.jar' + - '**/*.pom' + - '.github/workflows/sql-cli-integration-test.yml' + workflow_dispatch: + +jobs: + test-sql-cli-integration: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + java: [21] + + steps: + - name: Checkout SQL CLI repository (latest main) + uses: actions/checkout@v4 + with: + repository: opensearch-project/sql-cli + path: sql-cli + ref: main + + - name: Make a directory for the SQL repo + working-directory: sql-cli + run: mkdir remote + + - name: Checkout SQL repository (current changes) + uses: actions/checkout@v4 + with: + path: sql-cli/remote/sql + + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + + - name: Build and publish SQL modules to Maven Local + working-directory: sql-cli/remote/sql + run: | + echo "Building SQL modules from current branch..." + ./gradlew publishToMavenLocal -x test -x integTest + echo "SQL modules published to Maven Local" + + - name: Run SQL CLI tests with local SQL modules + working-directory: sql-cli + run: | + echo "Running SQL CLI tests against local SQL modules..." + ./gradlew test -PuseLocalSql=true -PskipSqlRepoPull=true + + - name: Upload SQL CLI test reports + if: always() + uses: actions/upload-artifact@v4 + continue-on-error: true + with: + name: sql-cli-test-reports-java-${{ matrix.java }} + path: | + sql-cli/build/reports/** + sql-cli/build/test-results/** + + - name: Test Summary + if: always() + run: | + echo "## SQL CLI Integration Test Results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "Tested SQL CLI against SQL changes from: \`${{ github.ref }}\`" >> $GITHUB_STEP_SUMMARY + echo "SQL CLI version: main branch (latest)" >> $GITHUB_STEP_SUMMARY + echo "Java version: ${{ matrix.java }}" >> $GITHUB_STEP_SUMMARY From 3ffd8ebff0510dfafc5a7be3a71a11ac7b6dcdbe Mon Sep 17 00:00:00 2001 From: Kyle Hounslow <7102778+kylehounslow@users.noreply.github.com> Date: Tue, 9 Dec 2025 13:52:42 -0800 Subject: [PATCH 98/99] Migrate PPL Documentation from RST to Markdown (#4912) --- DEVELOPER_GUIDE.rst | 2 +- README.md | 2 +- docs/category.json | 109 +- docs/dev/intro-v3-engine.md | 6 +- docs/dev/ppl-commands.md | 2 +- docs/dev/testing-doctest.md | 47 + .../admin/connectors/prometheus_connector.md | 326 ++ .../admin/connectors/prometheus_connector.rst | 279 -- .../ppl/admin/connectors/s3glue_connector.md | 77 + .../ppl/admin/connectors/s3glue_connector.rst | 92 - .../connectors/security_lake_connector.md | 63 + .../connectors/security_lake_connector.rst | 78 - docs/user/ppl/admin/cross_cluster_search.md | 89 + docs/user/ppl/admin/cross_cluster_search.rst | 96 - docs/user/ppl/admin/datasources.md | 304 ++ docs/user/ppl/admin/datasources.rst | 290 -- docs/user/ppl/admin/monitoring.md | 35 + docs/user/ppl/admin/monitoring.rst | 56 - docs/user/ppl/admin/security.md | 65 + docs/user/ppl/admin/security.rst | 70 - docs/user/ppl/admin/settings.md | 441 +++ docs/user/ppl/admin/settings.rst | 427 --- docs/user/ppl/cmd/ad.md | 124 + docs/user/ppl/cmd/ad.rst | 112 - docs/user/ppl/cmd/append.md | 63 + docs/user/ppl/cmd/append.rst | 66 - docs/user/ppl/cmd/appendcol.md | 126 + docs/user/ppl/cmd/appendcol.rst | 110 - docs/user/ppl/cmd/appendpipe.md | 70 + docs/user/ppl/cmd/appendpipe.rst | 68 - docs/user/ppl/cmd/bin.md | 469 +++ docs/user/ppl/cmd/bin.rst | 348 --- docs/user/ppl/cmd/chart.md | 200 ++ docs/user/ppl/cmd/chart.rst | 193 -- docs/user/ppl/cmd/dedup.md | 134 + docs/user/ppl/cmd/dedup.rst | 111 - docs/user/ppl/cmd/describe.md | 67 + docs/user/ppl/cmd/describe.rst | 70 - docs/user/ppl/cmd/eval.md | 132 + docs/user/ppl/cmd/eval.rst | 120 - docs/user/ppl/cmd/eventstats.md | 166 + docs/user/ppl/cmd/eventstats.rst | 162 - docs/user/ppl/cmd/expand.md | 50 + docs/user/ppl/cmd/expand.rst | 61 - docs/user/ppl/cmd/explain.md | 181 ++ docs/user/ppl/cmd/explain.rst | 190 -- docs/user/ppl/cmd/fields.md | 244 ++ docs/user/ppl/cmd/fields.rst | 206 -- docs/user/ppl/cmd/fillnull.md | 176 ++ docs/user/ppl/cmd/fillnull.rst | 156 - docs/user/ppl/cmd/flatten.md | 93 + docs/user/ppl/cmd/flatten.rst | 101 - docs/user/ppl/cmd/grok.md | 86 + docs/user/ppl/cmd/grok.rst | 81 - docs/user/ppl/cmd/head.md | 84 + docs/user/ppl/cmd/head.rst | 77 - docs/user/ppl/cmd/join.md | 214 ++ docs/user/ppl/cmd/join.rst | 198 -- docs/user/ppl/cmd/kmeans.md | 37 + docs/user/ppl/cmd/kmeans.rst | 44 - docs/user/ppl/cmd/lookup.md | 339 ++ docs/user/ppl/cmd/lookup.rst | 350 --- docs/user/ppl/cmd/ml.md | 153 + docs/user/ppl/cmd/ml.rst | 138 - docs/user/ppl/cmd/multisearch.md | 152 + docs/user/ppl/cmd/multisearch.rst | 126 - docs/user/ppl/cmd/parse.md | 133 + docs/user/ppl/cmd/parse.rst | 119 - docs/user/ppl/cmd/patterns.md | 260 ++ docs/user/ppl/cmd/patterns.rst | 225 -- docs/user/ppl/cmd/rare.md | 146 + docs/user/ppl/cmd/rare.rst | 132 - docs/user/ppl/cmd/regex.md | 155 + docs/user/ppl/cmd/regex.rst | 140 - docs/user/ppl/cmd/rename.md | 142 + docs/user/ppl/cmd/rename.rst | 130 - docs/user/ppl/cmd/replace.md | 330 ++ docs/user/ppl/cmd/replace.rst | 268 -- docs/user/ppl/cmd/reverse.md | 134 + docs/user/ppl/cmd/reverse.rst | 115 - docs/user/ppl/cmd/rex.md | 291 ++ docs/user/ppl/cmd/rex.rst | 235 -- docs/user/ppl/cmd/search.md | 745 +++++ docs/user/ppl/cmd/search.rst | 556 ---- docs/user/ppl/cmd/showdatasources.md | 32 + docs/user/ppl/cmd/showdatasources.rst | 38 - docs/user/ppl/cmd/sort.md | 256 ++ docs/user/ppl/cmd/sort.rst | 208 -- docs/user/ppl/cmd/spath.md | 110 + docs/user/ppl/cmd/spath.rst | 92 - docs/user/ppl/cmd/stats.md | 487 +++ docs/user/ppl/cmd/stats.rst | 409 --- docs/user/ppl/cmd/streamstats.md | 281 ++ docs/user/ppl/cmd/streamstats.rst | 273 -- docs/user/ppl/cmd/subquery.md | 197 ++ docs/user/ppl/cmd/subquery.rst | 206 -- docs/user/ppl/cmd/syntax.md | 18 + docs/user/ppl/cmd/syntax.rst | 30 - docs/user/ppl/cmd/table.md | 37 + docs/user/ppl/cmd/table.rst | 44 - docs/user/ppl/cmd/timechart.md | 375 +++ docs/user/ppl/cmd/timechart.rst | 351 --- docs/user/ppl/cmd/top.md | 164 + docs/user/ppl/cmd/top.rst | 145 - docs/user/ppl/cmd/trendline.md | 114 + docs/user/ppl/cmd/trendline.rst | 103 - docs/user/ppl/cmd/where.md | 207 ++ docs/user/ppl/cmd/where.rst | 165 - docs/user/ppl/functions/aggregations.md | 653 ++++ docs/user/ppl/functions/aggregations.rst | 522 ---- docs/user/ppl/functions/collection.md | 727 +++++ docs/user/ppl/functions/collection.rst | 450 --- docs/user/ppl/functions/condition.md | 803 +++++ docs/user/ppl/functions/condition.rst | 615 ---- docs/user/ppl/functions/conversion.md | 272 ++ docs/user/ppl/functions/cryptographic.md | 101 + docs/user/ppl/functions/cryptographic.rst | 90 - docs/user/ppl/functions/datetime.md | 2782 +++++++++++++++++ docs/user/ppl/functions/datetime.rst | 2360 -------------- docs/user/ppl/functions/expressions.md | 185 ++ docs/user/ppl/functions/expressions.rst | 177 -- docs/user/ppl/functions/ip.md | 61 + docs/user/ppl/functions/ip.rst | 69 - docs/user/ppl/functions/json.md | 502 +++ docs/user/ppl/functions/json.rst | 363 --- docs/user/ppl/functions/math.md | 1187 +++++++ docs/user/ppl/functions/math.rst | 1045 ------- docs/user/ppl/functions/relevance.md | 505 +++ docs/user/ppl/functions/relevance.rst | 424 --- docs/user/ppl/functions/statistical.md | 142 + docs/user/ppl/functions/statistical.rst | 109 - docs/user/ppl/functions/string.md | 549 ++++ docs/user/ppl/functions/string.rst | 479 --- docs/user/ppl/functions/system.md | 29 + docs/user/ppl/functions/system.rst | 31 - docs/user/ppl/general/comments.md | 49 + docs/user/ppl/general/comments.rst | 44 - docs/user/ppl/general/datatypes.md | 327 ++ docs/user/ppl/general/datatypes.rst | 392 --- docs/user/ppl/general/identifiers.md | 188 ++ docs/user/ppl/general/identifiers.rst | 188 -- docs/user/ppl/index.md | 100 + docs/user/ppl/index.rst | 137 - docs/user/ppl/interfaces/endpoint.md | 154 + docs/user/ppl/interfaces/endpoint.rst | 150 - docs/user/ppl/interfaces/protocol.md | 130 + docs/user/ppl/interfaces/protocol.rst | 137 - docs/user/ppl/limitations/limitations.md | 89 + docs/user/ppl/limitations/limitations.rst | 132 - .../reference/splunk_to_ppl_cheat_sheet.md | 193 +- doctest/markdown_parser.py | 286 ++ doctest/test_docs.py | 227 +- .../storage/PrometheusStorageFactoryTest.java | 4 +- scripts/docs_exporter/convert_rst_to_md.py | 536 ++++ .../docs_exporter/export_to_docs_website.py | 104 + .../docs_exporter/fix_markdown_formatting.py | 161 + 156 files changed, 20383 insertions(+), 16579 deletions(-) create mode 100644 docs/user/ppl/admin/connectors/prometheus_connector.md delete mode 100644 docs/user/ppl/admin/connectors/prometheus_connector.rst create mode 100644 docs/user/ppl/admin/connectors/s3glue_connector.md delete mode 100644 docs/user/ppl/admin/connectors/s3glue_connector.rst create mode 100644 docs/user/ppl/admin/connectors/security_lake_connector.md delete mode 100644 docs/user/ppl/admin/connectors/security_lake_connector.rst create mode 100644 docs/user/ppl/admin/cross_cluster_search.md delete mode 100644 docs/user/ppl/admin/cross_cluster_search.rst create mode 100644 docs/user/ppl/admin/datasources.md delete mode 100644 docs/user/ppl/admin/datasources.rst create mode 100644 docs/user/ppl/admin/monitoring.md delete mode 100644 docs/user/ppl/admin/monitoring.rst create mode 100644 docs/user/ppl/admin/security.md delete mode 100644 docs/user/ppl/admin/security.rst create mode 100644 docs/user/ppl/admin/settings.md delete mode 100644 docs/user/ppl/admin/settings.rst create mode 100644 docs/user/ppl/cmd/ad.md delete mode 100644 docs/user/ppl/cmd/ad.rst create mode 100644 docs/user/ppl/cmd/append.md delete mode 100644 docs/user/ppl/cmd/append.rst create mode 100644 docs/user/ppl/cmd/appendcol.md delete mode 100644 docs/user/ppl/cmd/appendcol.rst create mode 100644 docs/user/ppl/cmd/appendpipe.md delete mode 100644 docs/user/ppl/cmd/appendpipe.rst create mode 100644 docs/user/ppl/cmd/bin.md delete mode 100644 docs/user/ppl/cmd/bin.rst create mode 100644 docs/user/ppl/cmd/chart.md delete mode 100644 docs/user/ppl/cmd/chart.rst create mode 100644 docs/user/ppl/cmd/dedup.md delete mode 100644 docs/user/ppl/cmd/dedup.rst create mode 100644 docs/user/ppl/cmd/describe.md delete mode 100644 docs/user/ppl/cmd/describe.rst create mode 100644 docs/user/ppl/cmd/eval.md delete mode 100644 docs/user/ppl/cmd/eval.rst create mode 100644 docs/user/ppl/cmd/eventstats.md delete mode 100644 docs/user/ppl/cmd/eventstats.rst create mode 100644 docs/user/ppl/cmd/expand.md delete mode 100644 docs/user/ppl/cmd/expand.rst create mode 100644 docs/user/ppl/cmd/explain.md delete mode 100644 docs/user/ppl/cmd/explain.rst create mode 100644 docs/user/ppl/cmd/fields.md delete mode 100644 docs/user/ppl/cmd/fields.rst create mode 100644 docs/user/ppl/cmd/fillnull.md delete mode 100644 docs/user/ppl/cmd/fillnull.rst create mode 100644 docs/user/ppl/cmd/flatten.md delete mode 100644 docs/user/ppl/cmd/flatten.rst create mode 100644 docs/user/ppl/cmd/grok.md delete mode 100644 docs/user/ppl/cmd/grok.rst create mode 100644 docs/user/ppl/cmd/head.md delete mode 100644 docs/user/ppl/cmd/head.rst create mode 100644 docs/user/ppl/cmd/join.md delete mode 100644 docs/user/ppl/cmd/join.rst create mode 100644 docs/user/ppl/cmd/kmeans.md delete mode 100644 docs/user/ppl/cmd/kmeans.rst create mode 100644 docs/user/ppl/cmd/lookup.md delete mode 100644 docs/user/ppl/cmd/lookup.rst create mode 100644 docs/user/ppl/cmd/ml.md delete mode 100644 docs/user/ppl/cmd/ml.rst create mode 100644 docs/user/ppl/cmd/multisearch.md delete mode 100644 docs/user/ppl/cmd/multisearch.rst create mode 100644 docs/user/ppl/cmd/parse.md delete mode 100644 docs/user/ppl/cmd/parse.rst create mode 100644 docs/user/ppl/cmd/patterns.md delete mode 100644 docs/user/ppl/cmd/patterns.rst create mode 100644 docs/user/ppl/cmd/rare.md delete mode 100644 docs/user/ppl/cmd/rare.rst create mode 100644 docs/user/ppl/cmd/regex.md delete mode 100644 docs/user/ppl/cmd/regex.rst create mode 100644 docs/user/ppl/cmd/rename.md delete mode 100644 docs/user/ppl/cmd/rename.rst create mode 100644 docs/user/ppl/cmd/replace.md delete mode 100644 docs/user/ppl/cmd/replace.rst create mode 100644 docs/user/ppl/cmd/reverse.md delete mode 100644 docs/user/ppl/cmd/reverse.rst create mode 100644 docs/user/ppl/cmd/rex.md delete mode 100644 docs/user/ppl/cmd/rex.rst create mode 100644 docs/user/ppl/cmd/search.md delete mode 100644 docs/user/ppl/cmd/search.rst create mode 100644 docs/user/ppl/cmd/showdatasources.md delete mode 100644 docs/user/ppl/cmd/showdatasources.rst create mode 100644 docs/user/ppl/cmd/sort.md delete mode 100644 docs/user/ppl/cmd/sort.rst create mode 100644 docs/user/ppl/cmd/spath.md delete mode 100644 docs/user/ppl/cmd/spath.rst create mode 100644 docs/user/ppl/cmd/stats.md delete mode 100644 docs/user/ppl/cmd/stats.rst create mode 100644 docs/user/ppl/cmd/streamstats.md delete mode 100644 docs/user/ppl/cmd/streamstats.rst create mode 100644 docs/user/ppl/cmd/subquery.md delete mode 100644 docs/user/ppl/cmd/subquery.rst create mode 100644 docs/user/ppl/cmd/syntax.md delete mode 100644 docs/user/ppl/cmd/syntax.rst create mode 100644 docs/user/ppl/cmd/table.md delete mode 100644 docs/user/ppl/cmd/table.rst create mode 100644 docs/user/ppl/cmd/timechart.md delete mode 100644 docs/user/ppl/cmd/timechart.rst create mode 100644 docs/user/ppl/cmd/top.md delete mode 100644 docs/user/ppl/cmd/top.rst create mode 100644 docs/user/ppl/cmd/trendline.md delete mode 100644 docs/user/ppl/cmd/trendline.rst create mode 100644 docs/user/ppl/cmd/where.md delete mode 100644 docs/user/ppl/cmd/where.rst create mode 100644 docs/user/ppl/functions/aggregations.md delete mode 100644 docs/user/ppl/functions/aggregations.rst create mode 100644 docs/user/ppl/functions/collection.md delete mode 100644 docs/user/ppl/functions/collection.rst create mode 100644 docs/user/ppl/functions/condition.md delete mode 100644 docs/user/ppl/functions/condition.rst create mode 100644 docs/user/ppl/functions/conversion.md create mode 100644 docs/user/ppl/functions/cryptographic.md delete mode 100644 docs/user/ppl/functions/cryptographic.rst create mode 100644 docs/user/ppl/functions/datetime.md delete mode 100644 docs/user/ppl/functions/datetime.rst create mode 100644 docs/user/ppl/functions/expressions.md delete mode 100644 docs/user/ppl/functions/expressions.rst create mode 100644 docs/user/ppl/functions/ip.md delete mode 100644 docs/user/ppl/functions/ip.rst create mode 100644 docs/user/ppl/functions/json.md delete mode 100644 docs/user/ppl/functions/json.rst create mode 100644 docs/user/ppl/functions/math.md delete mode 100644 docs/user/ppl/functions/math.rst create mode 100644 docs/user/ppl/functions/relevance.md delete mode 100644 docs/user/ppl/functions/relevance.rst create mode 100644 docs/user/ppl/functions/statistical.md delete mode 100644 docs/user/ppl/functions/statistical.rst create mode 100644 docs/user/ppl/functions/string.md delete mode 100644 docs/user/ppl/functions/string.rst create mode 100644 docs/user/ppl/functions/system.md delete mode 100644 docs/user/ppl/functions/system.rst create mode 100644 docs/user/ppl/general/comments.md delete mode 100644 docs/user/ppl/general/comments.rst create mode 100644 docs/user/ppl/general/datatypes.md delete mode 100644 docs/user/ppl/general/datatypes.rst create mode 100644 docs/user/ppl/general/identifiers.md delete mode 100644 docs/user/ppl/general/identifiers.rst create mode 100644 docs/user/ppl/index.md delete mode 100644 docs/user/ppl/index.rst create mode 100644 docs/user/ppl/interfaces/endpoint.md delete mode 100644 docs/user/ppl/interfaces/endpoint.rst create mode 100644 docs/user/ppl/interfaces/protocol.md delete mode 100644 docs/user/ppl/interfaces/protocol.rst create mode 100644 docs/user/ppl/limitations/limitations.md delete mode 100644 docs/user/ppl/limitations/limitations.rst create mode 100644 doctest/markdown_parser.py create mode 100644 scripts/docs_exporter/convert_rst_to_md.py create mode 100755 scripts/docs_exporter/export_to_docs_website.py create mode 100755 scripts/docs_exporter/fix_markdown_formatting.py diff --git a/DEVELOPER_GUIDE.rst b/DEVELOPER_GUIDE.rst index 92304c51606..a179b1fc64d 100644 --- a/DEVELOPER_GUIDE.rst +++ b/DEVELOPER_GUIDE.rst @@ -172,7 +172,7 @@ Here are other files and sub-folders that you are likely to touch: - ``build.gradle``: Gradle build script. - ``docs``: documentation for developers and reference manual for users. -- ``doc-test``: code that run .rst docs in ``docs`` folder by Python doctest library. +- ``doctest``: code that runs .rst and .md docs in ``docs`` folder by Python doctest library. Note that other related project code has already merged into this single repository together: diff --git a/README.md b/README.md index d139da3430e..4a7e1e5ec9e 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ Recently we have been actively improving our query engine primarily for better c ## Documentation -Please refer to the [SQL Language Reference Manual](./docs/user/index.rst), [Piped Processing Language (PPL) Reference Manual](./docs/user/ppl/index.rst), [OpenSearch SQL/PPL Engine Development Manual](./docs/dev/index.md) and [Technical Documentation](https://opensearch.org/docs/latest/search-plugins/sql/index/) for detailed information on installing and configuring plugin. +Please refer to the [SQL Language Reference Manual](./docs/user/index.rst), [Piped Processing Language (PPL) Reference Manual](./docs/user/ppl/index.md), [OpenSearch SQL/PPL Engine Development Manual](./docs/dev/index.md) and [Technical Documentation](https://opensearch.org/docs/latest/search-plugins/sql/index/) for detailed information on installing and configuring plugin. ## Forum diff --git a/docs/category.json b/docs/category.json index f3fe70ecfa5..bf1f9b1d22d 100644 --- a/docs/category.json +++ b/docs/category.json @@ -4,8 +4,61 @@ "user/admin/settings.rst" ], "bash_calcite": [ - "user/ppl/interfaces/endpoint.rst", - "user/ppl/interfaces/protocol.rst" + "user/ppl/interfaces/endpoint.md", + "user/ppl/interfaces/protocol.md" + ], + "ppl_cli_calcite": [ + "user/ppl/cmd/ad.md", + "user/ppl/cmd/append.md", + "user/ppl/cmd/bin.md", + "user/ppl/cmd/dedup.md", + "user/ppl/cmd/describe.md", + "user/ppl/cmd/eventstats.md", + "user/ppl/cmd/eval.md", + "user/ppl/cmd/fields.md", + "user/ppl/cmd/fillnull.md", + "user/ppl/cmd/grok.md", + "user/ppl/cmd/head.md", + "user/ppl/cmd/join.md", + "user/ppl/cmd/lookup.md", + "user/ppl/cmd/parse.md", + "user/ppl/cmd/patterns.md", + "user/ppl/cmd/rare.md", + "user/ppl/cmd/regex.md", + "user/ppl/cmd/rename.md", + "user/ppl/cmd/multisearch.md", + "user/ppl/cmd/replace.md", + "user/ppl/cmd/rex.md", + "user/ppl/cmd/search.md", + "user/ppl/cmd/showdatasources.md", + "user/ppl/cmd/sort.md", + "user/ppl/cmd/spath.md", + "user/ppl/cmd/stats.md", + "user/ppl/cmd/streamstats.md", + "user/ppl/cmd/subquery.md", + "user/ppl/cmd/syntax.md", + "user/ppl/cmd/chart.md", + "user/ppl/cmd/timechart.md", + "user/ppl/cmd/top.md", + "user/ppl/cmd/trendline.md", + "user/ppl/cmd/where.md", + "user/ppl/functions/aggregations.md", + "user/ppl/functions/collection.md", + "user/ppl/functions/condition.md", + "user/ppl/functions/conversion.md", + "user/ppl/functions/cryptographic.md", + "user/ppl/functions/datetime.md", + "user/ppl/functions/expressions.md", + "user/ppl/functions/ip.md", + "user/ppl/functions/json.md", + "user/ppl/functions/math.md", + "user/ppl/functions/relevance.md", + "user/ppl/functions/statistical.md", + "user/ppl/functions/string.md", + "user/ppl/functions/system.md", + "user/ppl/general/comments.md", + "user/ppl/general/datatypes.md", + "user/ppl/general/identifiers.md" ], "sql_cli": [ "user/dql/expressions.rst", @@ -21,57 +74,7 @@ "user/dql/complex.rst", "user/dql/metadata.rst" ], - "ppl_cli_calcite": [ - "user/ppl/cmd/ad.rst", - "user/ppl/cmd/append.rst", - "user/ppl/cmd/bin.rst", - "user/ppl/cmd/dedup.rst", - "user/ppl/cmd/describe.rst", - "user/ppl/cmd/eventstats.rst", - "user/ppl/cmd/eval.rst", - "user/ppl/cmd/fields.rst", - "user/ppl/cmd/fillnull.rst", - "user/ppl/cmd/grok.rst", - "user/ppl/cmd/head.rst", - "user/ppl/cmd/join.rst", - "user/ppl/cmd/lookup.rst", - "user/ppl/cmd/parse.rst", - "user/ppl/cmd/patterns.rst", - "user/ppl/cmd/rare.rst", - "user/ppl/cmd/regex.rst", - "user/ppl/cmd/rename.rst", - "user/ppl/cmd/multisearch.rst", - "user/ppl/cmd/replace.rst", - "user/ppl/cmd/rex.rst", - "user/ppl/cmd/search.rst", - "user/ppl/cmd/showdatasources.rst", - "user/ppl/cmd/sort.rst", - "user/ppl/cmd/spath.rst", - "user/ppl/cmd/stats.rst", - "user/ppl/cmd/streamstats.rst", - "user/ppl/cmd/subquery.rst", - "user/ppl/cmd/syntax.rst", - "user/ppl/cmd/chart.rst", - "user/ppl/cmd/timechart.rst", - "user/ppl/cmd/search.rst", - "user/ppl/functions/statistical.rst", - "user/ppl/cmd/top.rst", - "user/ppl/cmd/trendline.rst", - "user/ppl/cmd/where.rst", - "user/ppl/functions/collection.rst", - "user/ppl/functions/condition.rst", - "user/ppl/functions/datetime.rst", - "user/ppl/functions/expressions.rst", - "user/ppl/functions/ip.rst", - "user/ppl/functions/json.rst", - "user/ppl/functions/math.rst", - "user/ppl/functions/relevance.rst", - "user/ppl/functions/string.rst", - "user/ppl/functions/conversion.rst", - "user/ppl/general/datatypes.rst", - "user/ppl/general/identifiers.rst" - ], "bash_settings": [ - "user/ppl/admin/settings.rst" + "user/ppl/admin/settings.md" ] } diff --git a/docs/dev/intro-v3-engine.md b/docs/dev/intro-v3-engine.md index 43b90bda204..fd73cd5c8e1 100644 --- a/docs/dev/intro-v3-engine.md +++ b/docs/dev/intro-v3-engine.md @@ -26,9 +26,9 @@ Find more details in [V3 Architecture](./intro-v3-architecture.md). In the initial release of the V3 engine (3.0.0), the main new features focus on enhancing the PPL language while maintaining maximum compatibility with V2 behavior. -* **[Join](../user/ppl/cmd/join.rst) Command** -* **[Lookup](../user/ppl/cmd/lookup.rst) Command** -* **[Subquery](../user/ppl/cmd/subquery.rst) Command** +* **[Join](../user/ppl/cmd/join.md) Command** +* **[Lookup](../user/ppl/cmd/lookup.md) Command** +* **[Subquery](../user/ppl/cmd/subquery.md) Command** V3 (Calcite integration) engine is enabled by default in 3.3.0. diff --git a/docs/dev/ppl-commands.md b/docs/dev/ppl-commands.md index 9d62e607f86..ea727e234a5 100644 --- a/docs/dev/ppl-commands.md +++ b/docs/dev/ppl-commands.md @@ -54,4 +54,4 @@ If you are working on contributing a new PPL command, please read this guide and - Add a test in `CrossClusterSearchIT` - [ ] **User doc:** - - Add a xxx.rst under `docs/user/ppl/cmd` and link the new doc to `docs/user/ppl/index.rst` + - Add a xxx.md under `docs/user/ppl/cmd` and link the new doc to `docs/user/ppl/index.md` diff --git a/docs/dev/testing-doctest.md b/docs/dev/testing-doctest.md index 1a966ba50c3..55d73d9f02a 100644 --- a/docs/dev/testing-doctest.md +++ b/docs/dev/testing-doctest.md @@ -57,11 +57,58 @@ Doctest runs with project build by `./gradlew build`. You can also only run doct Make sure you don't have any OpenSearch instance running at `http://localhost:9200` ### 1.4.2 How to write documentation with doctest? + +#### RST Format (SQL docs only. On Deprecation path. Use markdown for PPL) 1. If you want to add a new doc, you can add it to `docs` folder, under correct sub-folder, in `.rst` format. > **Attention**: For code examples in documentation, a Mixing usage of `cli` and `bash` in one doc is not supported yet. 2. Add your new doc file path to `docs/category.json` by its category 3. Run doctest `./gradlew doctest` (optionally with `-DignorePrometheus`) to see if your tests can pass +#### Markdown Format (New - Currently for docs/user/ppl only) +For PPL documentation, Markdown format is now supported with the following guidelines: + +1. **File Format**: Create `.md` file(s) in `docs/user/ppl` folder +2. **Category Configuration**: Add markdown files to markdown-only categories in `docs/category.json`: + - `ppl_cli_calcite`: PPL CLI examples with Calcite engine + - `bash_calcite`: Bash/curl examples with Calcite engine + - `bash_settings`: Bash examples for settings/configuration + +3. **Code Block Format**: Use **paired** fenced code blocks - each input block must be followed by its expected output block: + +```ppl +search source=accounts | where age > 25 | fields firstname, lastname +``` + +Expected output: + +```text ++-------------+------------+ +| firstname | lastname | +|-------------+------------| +| Amber | Duke | +| Hattie | Bond | ++-------------+------------+ +``` + +**Input/Output Pairs**: Each input code fence must be immediately followed by an "Expected output:" section with an output code fence +- **Supported Input Languages**: `sql`, `ppl`, `bash`, `sh`, `bash ppl` +- **Supported Output Languages**: `text`, `console`, `output`, `json`, `yaml` + +4. **Ignoring Tests**: To skip specific code blocks from testing, add `ignore` attribute: + +```ppl ignore +search source=accounts | head 5 +``` + +Expected output: + +```text +This output won't be tested +``` + +5. **Validation**: Markdown categories only accept `.md` files - mixing with `.rst` files will cause validation errors +6. **Testing**: Run `./gradlew doctest` to validate your markdown documentation + Currently, there is a `sample` folder under `docs` module to help you get started. ## 1.5 Future Plan diff --git a/docs/user/ppl/admin/connectors/prometheus_connector.md b/docs/user/ppl/admin/connectors/prometheus_connector.md new file mode 100644 index 00000000000..fab00fb21bb --- /dev/null +++ b/docs/user/ppl/admin/connectors/prometheus_connector.md @@ -0,0 +1,326 @@ +# Prometheus Connector + +## Introduction + +This page covers prometheus connector properties for dataSource configuration +and the nuances associated with prometheus connector. +## Prometheus Connector Properties in DataSource Configuration + +Prometheus Connector Properties. +* `prometheus.uri` [Required]. + * This parameters provides the URI information to connect to a prometheus instance. +* `prometheus.auth.type` [Optional] + * This parameters provides the authentication type information. + * Prometheus connector currently supports `basicauth` and `awssigv4` authentication mechanisms. + * If prometheus.auth.type is basicauth, following are required parameters. + * `prometheus.auth.username` and `prometheus.auth.password`. + * If prometheus.auth.type is awssigv4, following are required parameters. + * `prometheus.auth.region`, `prometheus.auth.access_key` and `prometheus.auth.secret_key` + +## Example prometheus dataSource configuration with different authentications + +No Auth + +```bash +[{ + "name" : "my_prometheus", + "connector": "prometheus", + "properties" : { + "prometheus.uri" : "http://localhost:9090" + } +}] + +``` + +Basic Auth + +```bash +[{ + "name" : "my_prometheus", + "connector": "prometheus", + "properties" : { + "prometheus.uri" : "http://localhost:9090", + "prometheus.auth.type" : "basicauth", + "prometheus.auth.username" : "admin", + "prometheus.auth.password" : "admin" + } +}] + +``` + +AWSSigV4 Auth + +```bash +[{ + "name" : "my_prometheus", + "connector": "prometheus", + "properties" : { + "prometheus.uri" : "http://localhost:8080", + "prometheus.auth.type" : "awssigv4", + "prometheus.auth.region" : "us-east-1", + "prometheus.auth.access_key" : "{{accessKey}}", + "prometheus.auth.secret_key" : "{{secretKey}}" + } +}] + +``` + +## PPL Query support for prometheus connector + +### Metric as a Table + +Each connector has to abstract the underlying datasource constructs into a table as part of the interface contract with the PPL query engine. +Prometheus connector abstracts each metric as a table and the columns of this table are `@value`, `@timestamp`, `label1`, `label2`---. +`@value` represents metric measurement and `@timestamp` represents the timestamp at which the metric is collected. labels are tags associated with metric queried. +For eg: `handler`, `code`, `instance`, `code` are the labels associated with `prometheus_http_requests_total` metric. With this abstraction, we can query prometheus +data using PPL syntax similar to opensearch indices. +Sample Example + +```ppl +source = my_prometheus.prometheus_http_requests_total +``` + +Expected output: + +```text ++--------+-----------------------+--------------+------+------------+------------+ +| @value | @timestamp | handler | code | instance | job | +|--------+-----------------------+--------------+------+------------+------------| +| 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | +| 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | +| 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | ++--------+-----------------------+--------------+------+------------+------------+ +``` + +### Default time range and resolution + +Since time range and resolution are required parameters for query apis and these parameters are determined in the following manner from the PPL commands. +* Time range is determined through filter clause on `@timestamp`. If there is no such filter clause, time range will be set to 1h with endtime set to now(). +* In case of stats, resolution is determined by `span(@timestamp,15s)` expression. For normal select queries, resolution is auto determined from the time range set. + +### Prometheus Connector Limitations + +* Only one aggregation is supported in stats command. +* Span Expression is compulsory in stats command. +* AVG, MAX, MIN, SUM, COUNT are the only aggregations supported in prometheus connector. +* Where clause only supports EQUALS(=) operation on metric dimensions and Comparative(> , < , >= , <=) Operations on @timestamp attribute. + +### Example queries + +1. Metric Selection Query + +```ppl +source = my_prometheus.prometheus_http_requests_total +``` + +Expected output: + +```text ++--------+-----------------------+--------------+------+------------+------------+ +| @value | @timestamp | handler | code | instance | job | +|--------+-----------------------+--------------+------+------------+------------| +| 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | +| 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | +| 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | ++--------+-----------------------+--------------+------+------------+------------+ +``` + +2. Metric Selecting Query with specific dimensions + +```ppl +source = my_prometheus.prometheus_http_requests_total +| where handler='/-/ready' and code='200' +``` + +Expected output: + +```text ++--------+-----------------------+------------+------+------------+------------+ +| @value | @timestamp | handler | code | instance | job | +|--------+-----------------------+------------+------+------------+------------| +| 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 2 | "2022-11-03 07:18:44" | "/-/ready" | 200 | 192.15.2.1 | prometheus | +| 9 | "2022-11-03 07:18:54" | "/-/ready" | 200 | 192.15.2.1 | prometheus | +| 11 | "2022-11-03 07:18:64" | "/-/ready" | 200 | 192.15.2.1 | prometheus | ++--------+-----------------------+------------+------+------------+------------+ +``` + +3. Average aggregation on a metric + +```ppl +source = my_prometheus.prometheus_http_requests_total +| stats avg(@value) by span(@timestamp,15s) +``` + +Expected output: + +```text ++------------+------------------------+ +| avg(@value)| span(@timestamp,15s) | +|------------+------------------------+ +| 5 | "2022-11-03 07:18:14" | +| 3 | "2022-11-03 07:18:24" | +| 7 | "2022-11-03 07:18:34" | +| 2 | "2022-11-03 07:18:44" | +| 9 | "2022-11-03 07:18:54" | +| 11 | "2022-11-03 07:18:64" | ++------------+------------------------+ +``` + +4. Average aggregation grouped by dimensions + +```ppl +source = my_prometheus.prometheus_http_requests_total +| stats avg(@value) by span(@timestamp,15s), handler, code +``` + +Expected output: + +```text ++------------+------------------------+--------------------------------+---------------+ +| avg(@value)| span(@timestamp,15s) | handler | code | +|------------+------------------------+--------------------------------+---------------+ +| 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | +| 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | +| 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | +| 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | +| 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | +| 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | ++------------+------------------------+--------------------------------+---------------+ +``` + +5. Count aggregation query + +```ppl +source = my_prometheus.prometheus_http_requests_total +| stats count() by span(@timestamp,15s), handler, code +``` + +Expected output: + +```text ++------------+------------------------+--------------------------------+---------------+ +| count() | span(@timestamp,15s) | handler | code | +|------------+------------------------+--------------------------------+---------------+ +| 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | +| 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | +| 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | +| 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | +| 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | +| 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | ++------------+------------------------+--------------------------------+---------------+ +``` + +## PromQL Support for prometheus Connector + +### `query_range` Table Function + +* Prometheus connector offers `query_range` table function. This table function can be used to query metrics in a specific time range using promQL. +* The function takes inputs similar to parameters mentioned for query range api mentioned here: [Prometheus query_range API](https://prometheus.io/docs/prometheus/latest/querying/api/) +* Arguments should be either passed by name or positionArguments should be either passed by name or position. + - `source=my_prometheus.query_range('prometheus_http_requests_total', 1686694425, 1686700130, 14)` + - `source=my_prometheus.query_range(query='prometheus_http_requests_total', starttime=1686694425, endtime=1686700130, step=14)` + +Example + +```ppl +source=my_prometheus.query_range('prometheus_http_requests_total', 1686694425, 1686700130, 14) +``` + +Expected output: + +```text ++--------+-----------------------+--------------+------+------------+------------+ +| @value | @timestamp | handler | code | instance | job | +|--------+-----------------------+--------------+------+------------+------------| +| 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | +| 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | +| 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | +| 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | ++--------+-----------------------+--------------+------+------------+------------+ +``` + +## Prometheus Connector Table Functions + +### `query_exemplars` Table Function + +* This table function can be used to fetch exemplars of a query in a specific time range. +* The function takes inputs similar to parameters mentioned for query exemplars api mentioned here: [Prometheus query_exemplars API](https://prometheus.io/docs/prometheus/latest/querying/api/) +* Arguments should be either passed by name or positionArguments should be either passed by name or position. + - `source=my_prometheus.query_exemplars('prometheus_http_requests_total', 1686694425, 1686700130)` + - `source=my_prometheus.query_exemplars(query='prometheus_http_requests_total', starttime=1686694425, endtime=1686700130)` + +Example + +```ppl +source=my_prometheus.query_exemplars('prometheus_http_requests_total', 1686694425, 1686700130) +``` + +Expected output: + +```text + "schema": [ + { + "name": "seriesLabels", + "type": "struct" + }, + { + "name": "exemplars", + "type": "array" + } + ], + "datarows": [ + [ + { + "instance": "localhost:8090", + "__name__": "test_exemplar_metric_total", + "service": "bar", + "job": "prometheus" + }, + [ + { + "labels": { + "traceID": "EpTxMJ40fUus7aGY" + }, + "timestamp": "2020-09-14 15:22:25.479", + "value": 6.0 + } + ] + ], + [ + { + "instance": "localhost:8090", + "__name__": "test_exemplar_metric_total", + "service": "foo", + "job": "prometheus" + }, + [ + { + "labels": { + "traceID": "Olp9XHlq763ccsfa" + }, + "timestamp": "2020-09-14 15:22:35.479", + "value": 19.0 + }, + { + "labels": { + "traceID": "hCtjygkIHwAN9vs4" + }, + "timestamp": "2020-09-14 15:22:45.489", + "value": 20.0 + } + ] + ] + ] +``` + \ No newline at end of file diff --git a/docs/user/ppl/admin/connectors/prometheus_connector.rst b/docs/user/ppl/admin/connectors/prometheus_connector.rst deleted file mode 100644 index 812df4f8943..00000000000 --- a/docs/user/ppl/admin/connectors/prometheus_connector.rst +++ /dev/null @@ -1,279 +0,0 @@ -.. highlight:: sh - -==================== -Prometheus Connector -==================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -Introduction -============ - -This page covers prometheus connector properties for dataSource configuration -and the nuances associated with prometheus connector. - - -Prometheus Connector Properties in DataSource Configuration -======================================================== -Prometheus Connector Properties. - -* ``prometheus.uri`` [Required]. - * This parameters provides the URI information to connect to a prometheus instance. -* ``prometheus.auth.type`` [Optional] - * This parameters provides the authentication type information. - * Prometheus connector currently supports ``basicauth`` and ``awssigv4`` authentication mechanisms. - * If prometheus.auth.type is basicauth, following are required parameters. - * ``prometheus.auth.username`` and ``prometheus.auth.password``. - * If prometheus.auth.type is awssigv4, following are required parameters. - * ``prometheus.auth.region``, ``prometheus.auth.access_key`` and ``prometheus.auth.secret_key`` - -Example prometheus dataSource configuration with different authentications -======================================================================= - -No Auth :: - - [{ - "name" : "my_prometheus", - "connector": "prometheus", - "properties" : { - "prometheus.uri" : "http://localhost:9090" - } - }] - -Basic Auth :: - - [{ - "name" : "my_prometheus", - "connector": "prometheus", - "properties" : { - "prometheus.uri" : "http://localhost:9090", - "prometheus.auth.type" : "basicauth", - "prometheus.auth.username" : "admin", - "prometheus.auth.password" : "admin" - } - }] - -AWSSigV4 Auth:: - - [{ - "name" : "my_prometheus", - "connector": "prometheus", - "properties" : { - "prometheus.uri" : "http://localhost:8080", - "prometheus.auth.type" : "awssigv4", - "prometheus.auth.region" : "us-east-1", - "prometheus.auth.access_key" : "{{accessKey}}" - "prometheus.auth.secret_key" : "{{secretKey}}" - } - }] - -PPL Query support for prometheus connector -========================================== - -Metric as a Table ---------------------------- -Each connector has to abstract the underlying datasource constructs into a table as part of the interface contract with the PPL query engine. -Prometheus connector abstracts each metric as a table and the columns of this table are ``@value``, ``@timestamp``, ``label1``, ``label2``---. -``@value`` represents metric measurement and ``@timestamp`` represents the timestamp at which the metric is collected. labels are tags associated with metric queried. -For eg: ``handler``, ``code``, ``instance``, ``code`` are the labels associated with ``prometheus_http_requests_total`` metric. With this abstraction, we can query prometheus -data using PPL syntax similar to opensearch indices. - -Sample Example:: - - > source = my_prometheus.prometheus_http_requests_total; - - +--------+-----------------------+--------------+------+------------+------------+ - | @value | @timestamp | handler | code | instance | job | - |--------+-----------------------+--------------+------+------------+------------| - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | - | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | - | 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | - +--------+-----------------------+--------------+------+------------+------------+ - - - -Default time range and resolution ---------------------------------- -Since time range and resolution are required parameters for query apis and these parameters are determined in the following manner from the PPL commands. -* Time range is determined through filter clause on ``@timestamp``. If there is no such filter clause, time range will be set to 1h with endtime set to now(). -* In case of stats, resolution is determined by ``span(@timestamp,15s)`` expression. For normal select queries, resolution is auto determined from the time range set. - -Prometheus Connector Limitations --------------------------------- -* Only one aggregation is supported in stats command. -* Span Expression is compulsory in stats command. -* AVG, MAX, MIN, SUM, COUNT are the only aggregations supported in prometheus connector. -* Where clause only supports EQUALS(=) operation on metric dimensions and Comparative(> , < , >= , <=) Operations on @timestamp attribute. - -Example queries ---------------- - -1. Metric Selection Query:: - - > source = my_prometheus.prometheus_http_requests_total - +--------+-----------------------+--------------+------+------------+------------+ - | @value | @timestamp | handler | code | instance | job | - |--------+-----------------------+--------------+------+------------+------------| - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | - | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | - | 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | - +--------+-----------------------+--------------+------+------------+------------+ - -2. Metric Selecting Query with specific dimensions:: - - > source = my_prometheus.prometheus_http_requests_total | where handler='/-/ready' and code='200' - +--------+-----------------------+------------+------+------------+------------+ - | @value | @timestamp | handler | code | instance | job | - |--------+-----------------------+------------+------+------------+------------| - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 200 | 192.15.2.1 | prometheus | - | 9 | "2022-11-03 07:18:54" | "/-/ready" | 200 | 192.15.2.1 | prometheus | - | 11 | "2022-11-03 07:18:64" | "/-/ready" | 200 | 192.15.2.1 | prometheus | - +--------+-----------------------+------------+------+------------+------------+ - -3. Average aggregation on a metric:: - - > source = my_prometheus.prometheus_http_requests_total | stats avg(@value) by span(@timestamp,15s) - +------------+------------------------+ - | avg(@value)| span(@timestamp,15s) | - |------------+------------------------+ - | 5 | "2022-11-03 07:18:14" | - | 3 | "2022-11-03 07:18:24" | - | 7 | "2022-11-03 07:18:34" | - | 2 | "2022-11-03 07:18:44" | - | 9 | "2022-11-03 07:18:54" | - | 11 | "2022-11-03 07:18:64" | - +------------+------------------------+ - -4. Average aggregation grouped by dimensions:: - - > source = my_prometheus.prometheus_http_requests_total | stats avg(@value) by span(@timestamp,15s), handler, code - +------------+------------------------+--------------------------------+---------------+ - | avg(@value)| span(@timestamp,15s) | handler | code | - |------------+------------------------+--------------------------------+---------------+ - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | - | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | - | 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | - +------------+------------------------+--------------------------------+---------------+ - -5. Count aggregation query:: - - > source = my_prometheus.prometheus_http_requests_total | stats count() by span(@timestamp,15s), handler, code - +------------+------------------------+--------------------------------+---------------+ - | count() | span(@timestamp,15s) | handler | code | - |------------+------------------------+--------------------------------+---------------+ - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | - | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | - | 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | - +------------+------------------------+--------------------------------+---------------+ - -PromQL Support for prometheus Connector -========================================== - -`query_range` Table Function ----------------------------- -* Prometheus connector offers `query_range` table function. This table function can be used to query metrics in a specific time range using promQL. -* The function takes inputs similar to parameters mentioned for query range api mentioned here: https://prometheus.io/docs/prometheus/latest/querying/api/ -* Arguments should be either passed by name or positionArguments should be either passed by name or position. - - `source=my_prometheus.query_range('prometheus_http_requests_total', 1686694425, 1686700130, 14)` - - `source=my_prometheus.query_range(query='prometheus_http_requests_total', starttime=1686694425, endtime=1686700130, step=14)` -Example:: - - > source=my_prometheus.query_range('prometheus_http_requests_total', 1686694425, 1686700130, 14) - +--------+-----------------------+--------------+------+------------+------------+ - | @value | @timestamp | handler | code | instance | job | - |--------+-----------------------+--------------+------+------------+------------| - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | - | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | - | 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | - +--------+-----------------------+--------------+------+------------+------------+ - - -Prometheus Connector Table Functions -========================================== - -`query_exemplars` Table Function ----------------------------- -* This table function can be used to fetch exemplars of a query in a specific time range. -* The function takes inputs similar to parameters mentioned for query exemplars api mentioned here: https://prometheus.io/docs/prometheus/latest/querying/api/ -* Arguments should be either passed by name or positionArguments should be either passed by name or position. - - `source=my_prometheus.query_exemplars('prometheus_http_requests_total', 1686694425, 1686700130)` - - `source=my_prometheus.query_exemplars(query='prometheus_http_requests_total', starttime=1686694425, endtime=1686700130)` -Example:: - - > source=my_prometheus.query_exemplars('prometheus_http_requests_total', 1686694425, 1686700130) - "schema": [ - { - "name": "seriesLabels", - "type": "struct" - }, - { - "name": "exemplars", - "type": "array" - } - ], - "datarows": [ - [ - { - "instance": "localhost:8090", - "__name__": "test_exemplar_metric_total", - "service": "bar", - "job": "prometheus" - }, - [ - { - "labels": { - "traceID": "EpTxMJ40fUus7aGY" - }, - "timestamp": "2020-09-14 15:22:25.479", - "value": 6.0 - } - ] - ], - [ - { - "instance": "localhost:8090", - "__name__": "test_exemplar_metric_total", - "service": "foo", - "job": "prometheus" - }, - [ - { - "labels": { - "traceID": "Olp9XHlq763ccsfa" - }, - "timestamp": "2020-09-14 15:22:35.479", - "value": 19.0 - }, - { - "labels": { - "traceID": "hCtjygkIHwAN9vs4" - }, - "timestamp": "2020-09-14 15:22:45.489", - "value": 20.0 - } - ] - ] - ] diff --git a/docs/user/ppl/admin/connectors/s3glue_connector.md b/docs/user/ppl/admin/connectors/s3glue_connector.md new file mode 100644 index 00000000000..4b2d75f1042 --- /dev/null +++ b/docs/user/ppl/admin/connectors/s3glue_connector.md @@ -0,0 +1,77 @@ +# S3Glue Connector + +## Introduction + +s3Glue connector provides a way to query s3 files using glue as metadata store and spark as execution engine. +This page covers s3Glue datasource configuration and also how to query and s3Glue datasource. +## Required resources for s3 Glue Connector + +* `EMRServerless Spark Execution Engine Config Setting`: Since we execute s3Glue queries on top of spark execution engine, we require this configuration. + + More details: [ExecutionEngine Config](../../../interfaces/asyncqueryinterface.rst#id2) +* `S3`: This is where the data lies. +* `Glue` Metadata store: Glue takes care of table metadata. +* `Opensearch IndexStore`: Index for s3 data lies in opensearch and also acts as temporary buffer for query results. + +We currently only support emr-serverless as spark execution engine and Glue as metadata store. we will add more support in future. +Glue Connector Properties. +* `resultIndex` is a new parameter specific to glue connector. Stores the results of queries executed on the data source. If unavailable, it defaults to .query_execution_result. +* `glue.auth.type` [Required] + * This parameters provides the authentication type information required for execution engine to connect to glue. + * S3 Glue connector currently only supports `iam_role` authentication and the below parameters is required. + * `glue.auth.role_arn` +* `glue.indexstore.opensearch.*` [Required] + * This parameters provides the Opensearch domain host information for glue connector. This opensearch instance is used for writing index data back and also + * `glue.indexstore.opensearch.uri` [Required] + * `glue.indexstore.opensearch.auth` [Required] + * Accepted values include ["noauth", "basicauth", "awssigv4"] + * Basic Auth required `glue.indexstore.opensearch.auth.username` and `glue.indexstore.opensearch.auth.password` + * AWSSigV4 Auth requires `glue.indexstore.opensearch.auth.region` and `glue.auth.role_arn` + * `glue.indexstore.opensearch.region` [Required for awssigv4 auth] +* `glue.iceberg.enabled` determines whether to enable Iceberg for the session. Default value is `"false"` if not specified. +* `glue.lakeformation.enabled` determines whether to enable Lake Formation for queries when Iceberg is also enabled. If Iceberg is not enabled, then this property has no effect. Default value is `"false"` if not specified. +* `glue.lakeformation.session_tag` what session tag to use when assuming the data source role. This property is required when both Iceberg and Lake Formation are enabled. + +## Sample Glue dataSource configuration + +Glue datasource configuration + +```bash +[{ + "name" : "my_glue", + "connector": "s3glue", + "properties" : { + "glue.auth.type": "iam_role", + "glue.auth.role_arn": "role_arn", + "glue.indexstore.opensearch.uri": "http://localhost:9200", + "glue.indexstore.opensearch.auth" :"basicauth", + "glue.indexstore.opensearch.auth.username" :"username", + "glue.indexstore.opensearch.auth.password" :"password" + }, + "resultIndex": "query_execution_result" +}] + +[{ + "name" : "my_glue", + "connector": "s3glue", + "properties" : { + "glue.auth.type": "iam_role", + "glue.auth.role_arn": "role_arn", + "glue.indexstore.opensearch.uri": "http://adsasdf.amazonopensearch.com:9200", + "glue.indexstore.opensearch.auth" :"awssigv4", + "glue.indexstore.opensearch.auth.region" :"us-east-1" + }, + "resultIndex": "query_execution_result" +}] + +``` + +## Sample s3Glue datasource queries APIS + +Sample Queries +* Select Query : `select * from mys3.default.http_logs limit 1"` +* Create Covering Index Query: `create index clientip_year on my_glue.default.http_logs (clientip, year) WITH (auto_refresh=true)` +* Create Skipping Index: `create skipping index on mys3.default.http_logs (status VALUE_SET)` + +These queries would work only top of async queries. Documentation: [Async Query APIs](../../../interfaces/asyncqueryinterface.rst) +Documentation for Index Queries: https://github.com/opensearch-project/opensearch-spark/blob/main/docs/index.md \ No newline at end of file diff --git a/docs/user/ppl/admin/connectors/s3glue_connector.rst b/docs/user/ppl/admin/connectors/s3glue_connector.rst deleted file mode 100644 index 48f19a9d1e5..00000000000 --- a/docs/user/ppl/admin/connectors/s3glue_connector.rst +++ /dev/null @@ -1,92 +0,0 @@ -.. highlight:: sh - -==================== -S3Glue Connector -==================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -Introduction -============ - -s3Glue connector provides a way to query s3 files using glue as metadata store and spark as execution engine. -This page covers s3Glue datasource configuration and also how to query and s3Glue datasource. - -Required resources for s3 Glue Connector -======================================== -* ``EMRServerless Spark Execution Engine Config Setting``: Since we execute s3Glue queries on top of spark execution engine, we require this configuration. - More details: `ExecutionEngine Config <../../../interfaces/asyncqueryinterface.rst#id2>`_ -* ``S3``: This is where the data lies. -* ``Glue`` Metadata store: Glue takes care of table metadata. -* ``Opensearch IndexStore``: Index for s3 data lies in opensearch and also acts as temporary buffer for query results. - -We currently only support emr-serverless as spark execution engine and Glue as metadata store. we will add more support in future. - -Glue Connector Properties. - -* ``resultIndex`` is a new parameter specific to glue connector. Stores the results of queries executed on the data source. If unavailable, it defaults to .query_execution_result. -* ``glue.auth.type`` [Required] - * This parameters provides the authentication type information required for execution engine to connect to glue. - * S3 Glue connector currently only supports ``iam_role`` authentication and the below parameters is required. - * ``glue.auth.role_arn`` -* ``glue.indexstore.opensearch.*`` [Required] - * This parameters provides the Opensearch domain host information for glue connector. This opensearch instance is used for writing index data back and also - * ``glue.indexstore.opensearch.uri`` [Required] - * ``glue.indexstore.opensearch.auth`` [Required] - * Accepted values include ["noauth", "basicauth", "awssigv4"] - * Basic Auth required ``glue.indexstore.opensearch.auth.username`` and ``glue.indexstore.opensearch.auth.password`` - * AWSSigV4 Auth requires ``glue.indexstore.opensearch.auth.region`` and ``glue.auth.role_arn`` - * ``glue.indexstore.opensearch.region`` [Required for awssigv4 auth] -* ``glue.iceberg.enabled`` determines whether to enable Iceberg for the session. Default value is ``"false"`` if not specified. -* ``glue.lakeformation.enabled`` determines whether to enable Lake Formation for queries when Iceberg is also enabled. If Iceberg is not enabled, then this property has no effect. Default value is ``"false"`` if not specified. -* ``glue.lakeformation.session_tag`` what session tag to use when assuming the data source role. This property is required when both Iceberg and Lake Formation are enabled. - -Sample Glue dataSource configuration -======================================== - -Glue datasource configuration:: - - [{ - "name" : "my_glue", - "connector": "s3glue", - "properties" : { - "glue.auth.type": "iam_role", - "glue.auth.role_arn": "role_arn", - "glue.indexstore.opensearch.uri": "http://localhost:9200", - "glue.indexstore.opensearch.auth" :"basicauth", - "glue.indexstore.opensearch.auth.username" :"username", - "glue.indexstore.opensearch.auth.password" :"password" - }, - "resultIndex": "query_execution_result" - }] - - [{ - "name" : "my_glue", - "connector": "s3glue", - "properties" : { - "glue.auth.type": "iam_role", - "glue.auth.role_arn": "role_arn", - "glue.indexstore.opensearch.uri": "http://adsasdf.amazonopensearch.com:9200", - "glue.indexstore.opensearch.auth" :"awssigv4", - "glue.indexstore.opensearch.auth.region" :"us-east-1" - }, - "resultIndex": "query_execution_result" - }] - -Sample s3Glue datasource queries APIS -===================================== - -Sample Queries - -* Select Query : ``select * from mys3.default.http_logs limit 1"`` -* Create Covering Index Query: ``create index clientip_year on my_glue.default.http_logs (clientip, year) WITH (auto_refresh=true)`` -* Create Skipping Index: ``create skipping index on mys3.default.http_logs (status VALUE_SET)`` - -These queries would work only top of async queries. Documentation: `Async Query APIs <../../../interfaces/asyncqueryinterface.rst>`_ - -Documentation for Index Queries: https://github.com/opensearch-project/opensearch-spark/blob/main/docs/index.md diff --git a/docs/user/ppl/admin/connectors/security_lake_connector.md b/docs/user/ppl/admin/connectors/security_lake_connector.md new file mode 100644 index 00000000000..d33f1f49702 --- /dev/null +++ b/docs/user/ppl/admin/connectors/security_lake_connector.md @@ -0,0 +1,63 @@ +# Security Lake Connector + +## Introduction + +Security Lake connector provides a way to query Security Lake tables. +## Required resources for Security Lake Connector + +* `EMRServerless Spark Execution Engine Config Setting`: Since we execute s3Glue queries on top of spark execution engine, we require this configuration. + + More details: [ExecutionEngine Config](../../../interfaces/asyncqueryinterface.rst#id2) +* `S3`: This is where the data lies. +* `Glue`: Metadata store: Glue takes care of table metadata. +* `Lake Formation`: AWS service that performs authorization on Security Lake tables +* `Security Lake`: AWS service that orchestrates creation of S3 files, Glue tables, and Lake Formation permissions. +* `Opensearch IndexStore`: Index for s3 data lies in opensearch and also acts as temporary buffer for query results. + +We currently only support emr-serverless as spark execution engine and Glue as metadata store. we will add more support in future. +Glue Connector Properties. +* `resultIndex` is a new parameter specific to glue connector. Stores the results of queries executed on the data source. If unavailable, it defaults to .query_execution_result. +* `glue.auth.type` [Required] + * This parameters provides the authentication type information required for execution engine to connect to glue. + * S3 Glue connector currently only supports `iam_role` authentication and the below parameters is required. + * `glue.auth.role_arn` +* `glue.indexstore.opensearch.*` [Required] + * This parameters provides the Opensearch domain host information for glue connector. This opensearch instance is used for writing index data back and also + * `glue.indexstore.opensearch.uri` [Required] + * `glue.indexstore.opensearch.auth` [Required] + * Accepted values include ["noauth", "basicauth", "awssigv4"] + * Basic Auth required `glue.indexstore.opensearch.auth.username` and `glue.indexstore.opensearch.auth.password` + * AWSSigV4 Auth requires `glue.indexstore.opensearch.auth.region` and `glue.auth.role_arn` + * `glue.indexstore.opensearch.region` [Required for awssigv4 auth] +* `glue.lakeformation.session_tag` [Required] + * What session tag to use when assuming the data source role. + +## Sample Glue dataSource configuration + +Glue datasource configuration + +```bash +[{ + "name" : "my_sl", + "connector": "security_lake", + "properties" : { + "glue.auth.type": "iam_role", + "glue.auth.role_arn": "role_arn", + "glue.indexstore.opensearch.uri": "http://adsasdf.amazonopensearch.com:9200", + "glue.indexstore.opensearch.auth" :"awssigv4", + "glue.indexstore.opensearch.auth.region" :"us-east-1", + "glue.lakeformation.session_tag": "sesson_tag" + }, + "resultIndex": "query_execution_result" +}] + +``` + +## Sample Security Lake datasource queries APIS + +Sample Queries +* Select Query : `select * from mysl.amazon_security_lake_glue_db_eu_west_1.amazon_security_lake_table_eu_west_1_vpc_flow_2_0 limit 1` +* Create Covering Index Query: `create index srcip_time on mysl.amazon_security_lake_glue_db_eu_west_1.amazon_security_lake_table_eu_west_1_vpc_flow_2_0 (src_endpoint.ip, time) WITH (auto_refresh=true)` + +These queries would work only top of async queries. Documentation: [Async Query APIs](../../../interfaces/asyncqueryinterface.rst) +Documentation for Index Queries: https://github.com/opensearch-project/opensearch-spark/blob/main/docs/index.md \ No newline at end of file diff --git a/docs/user/ppl/admin/connectors/security_lake_connector.rst b/docs/user/ppl/admin/connectors/security_lake_connector.rst deleted file mode 100644 index 6afddca1319..00000000000 --- a/docs/user/ppl/admin/connectors/security_lake_connector.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. highlight:: sh - -==================== -Security Lake Connector -==================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -Introduction -============ - -Security Lake connector provides a way to query Security Lake tables. - -Required resources for Security Lake Connector -======================================== -* ``EMRServerless Spark Execution Engine Config Setting``: Since we execute s3Glue queries on top of spark execution engine, we require this configuration. - More details: `ExecutionEngine Config <../../../interfaces/asyncqueryinterface.rst#id2>`_ -* ``S3``: This is where the data lies. -* ``Glue``: Metadata store: Glue takes care of table metadata. -* ``Lake Formation``: AWS service that performs authorization on Security Lake tables -* ``Security Lake``: AWS service that orchestrates creation of S3 files, Glue tables, and Lake Formation permissions. -* ``Opensearch IndexStore``: Index for s3 data lies in opensearch and also acts as temporary buffer for query results. - -We currently only support emr-serverless as spark execution engine and Glue as metadata store. we will add more support in future. - -Glue Connector Properties. - -* ``resultIndex`` is a new parameter specific to glue connector. Stores the results of queries executed on the data source. If unavailable, it defaults to .query_execution_result. -* ``glue.auth.type`` [Required] - * This parameters provides the authentication type information required for execution engine to connect to glue. - * S3 Glue connector currently only supports ``iam_role`` authentication and the below parameters is required. - * ``glue.auth.role_arn`` -* ``glue.indexstore.opensearch.*`` [Required] - * This parameters provides the Opensearch domain host information for glue connector. This opensearch instance is used for writing index data back and also - * ``glue.indexstore.opensearch.uri`` [Required] - * ``glue.indexstore.opensearch.auth`` [Required] - * Accepted values include ["noauth", "basicauth", "awssigv4"] - * Basic Auth required ``glue.indexstore.opensearch.auth.username`` and ``glue.indexstore.opensearch.auth.password`` - * AWSSigV4 Auth requires ``glue.indexstore.opensearch.auth.region`` and ``glue.auth.role_arn`` - * ``glue.indexstore.opensearch.region`` [Required for awssigv4 auth] -* ``glue.lakeformation.session_tag`` [Required] - * What session tag to use when assuming the data source role. - -Sample Glue dataSource configuration -======================================== - -Glue datasource configuration:: - - [{ - "name" : "my_sl", - "connector": "security_lake", - "properties" : { - "glue.auth.type": "iam_role", - "glue.auth.role_arn": "role_arn", - "glue.indexstore.opensearch.uri": "http://adsasdf.amazonopensearch.com:9200", - "glue.indexstore.opensearch.auth" :"awssigv4", - "glue.indexstore.opensearch.auth.region" :"us-east-1", - "glue.lakeformation.session_tag": "sesson_tag" - }, - "resultIndex": "query_execution_result" - }] - -Sample Security Lake datasource queries APIS -===================================== - -Sample Queries - -* Select Query : ``select * from mysl.amazon_security_lake_glue_db_eu_west_1.amazon_security_lake_table_eu_west_1_vpc_flow_2_0 limit 1`` -* Create Covering Index Query: ``create index srcip_time on mysl.amazon_security_lake_glue_db_eu_west_1.amazon_security_lake_table_eu_west_1_vpc_flow_2_0 (src_endpoint.ip, time) WITH (auto_refresh=true)`` - -These queries would work only top of async queries. Documentation: `Async Query APIs <../../../interfaces/asyncqueryinterface.rst>`_ - -Documentation for Index Queries: https://github.com/opensearch-project/opensearch-spark/blob/main/docs/index.md diff --git a/docs/user/ppl/admin/cross_cluster_search.md b/docs/user/ppl/admin/cross_cluster_search.md new file mode 100644 index 00000000000..4acdd41e354 --- /dev/null +++ b/docs/user/ppl/admin/cross_cluster_search.md @@ -0,0 +1,89 @@ +# Cross-Cluster Search + +## Introduction + +Cross-cluster search lets any node in a cluster execute search requests against other clusters. +It makes searching easy across all connected clusters, allowing users to use multiple smaller clusters instead of a single large one. +## Configuration + +On the local cluster, add the remote cluster name and the IP address with port 9300 for each seed node. + +```bash +PUT _cluster/settings +{ + "persistent": { + "cluster.remote": { + "": { + "seeds": [":9300"] + } + } + } +} +``` + +## Using Cross-Cluster Search in PPL + +Perform cross-cluster search by using "\:\" as the index identifier. +Example PPL query + +```ppl +source=my_remote_cluster:accounts +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ +| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | +|----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| +| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | +| 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | +| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | +| 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | ++----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ +``` + +## Limitations + +Since OpenSearch does not support cross-cluster index metadata retrieval, field mapping of a remote cluster index is not available to the local cluster. +([[Feature] Cross-cluster field mappings query #6573](https://github.com/opensearch-project/OpenSearch/issues/6573)) +Therefore, the query engine requires that for any remote cluster index that the users need to search, +the local cluster keep a field mapping system index with the same index name. +This can be done by creating an index on the local cluster with the same name and schema as the remote cluster index. +## Authentication and Permission + +1. The security plugin authenticates the user on the local cluster. +2. The security plugin fetches the user’s backend roles on the local cluster. +3. The call, including the authenticated user, is forwarded to the remote cluster. +4. The user’s permissions are evaluated on the remote cluster. + +Check [Cross-cluster search access control](https://opensearch.org/docs/latest/security/access-control/cross-cluster-search/) for more details. +Example: Create the ppl_role for test_user on local cluster and the ccs_role for test_user on remote cluster. Then test_user could use PPL to query `ppl-security-demo` index on remote cluster. +1. On the local cluster, refer to [Security Settings](security.md) to create role and user for PPL plugin and index access permission. +2. On the remote cluster, create a new role and grant permission to access index. Create a user with the same name and credentials as the local cluster, and map the user to this role + +```bash +PUT _plugins/_security/api/roles/ccs_role +{ + "index_permissions":[ + { + "index_patterns":["ppl-security-demo"], + "allowed_actions":[ + "indices:admin/shards/search_shards", + "indices:data/read/search" + ] + } + ] +} +``` + +```bash +PUT _plugins/_security/api/rolesmapping/ccs_role +{ + "backend_roles" : [], + "hosts" : [], + "users" : ["test_user"] +} +``` + \ No newline at end of file diff --git a/docs/user/ppl/admin/cross_cluster_search.rst b/docs/user/ppl/admin/cross_cluster_search.rst deleted file mode 100644 index a94a0dce67e..00000000000 --- a/docs/user/ppl/admin/cross_cluster_search.rst +++ /dev/null @@ -1,96 +0,0 @@ -.. highlight:: sh - -==================== -Cross-Cluster Search -==================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -Introduction -============ -Cross-cluster search lets any node in a cluster execute search requests against other clusters. -It makes searching easy across all connected clusters, allowing users to use multiple smaller clusters instead of a single large one. - - -Configuration -============= -On the local cluster, add the remote cluster name and the IP address with port 9300 for each seed node. :: - - PUT _cluster/settings - { - "persistent": { - "cluster.remote": { - "": { - "seeds": [":9300"] - } - } - } - } - - -Using Cross-Cluster Search in PPL -================================= -Perform cross-cluster search by using ":" as the index identifier. - -Example PPL query:: - - os> source=my_remote_cluster:accounts; - fetched rows / total rows = 4/4 - +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | - | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | - +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ - - -Limitations -=========== -Since OpenSearch does not support cross cluster index metadata retrieval, field mapping of a remote cluster index is not available to the local cluster. -(`[Feature] Cross cluster field mappings query #6573 `_) -Therefore, the query engine requires that for any remote cluster index that the users need to search, -the local cluster keep a field mapping system index with the same index name. -This can be done by creating an index on the local cluster with the same name and schema as the remote cluster index. - - -Authentication and Permission -============================= - -1. The security plugin authenticates the user on the local cluster. -2. The security plugin fetches the user’s backend roles on the local cluster. -3. The call, including the authenticated user, is forwarded to the remote cluster. -4. The user’s permissions are evaluated on the remote cluster. - -Check `Cross-cluster search access control `_ for more details. - -Example: Create the ppl_role for test_user on local cluster and the ccs_role for test_user on remote cluster. Then test_user could use PPL to query ``ppl-security-demo`` index on remote cluster. - -1. On the local cluster, refer to `Security Settings `_ to create role and user for PPL plugin and index access permission. - -2. On the remote cluster, create a new role and grant permission to access index. Create a user with the same name and credentials as the local cluster, and map the user to this role:: - - PUT _plugins/_security/api/roles/ccs_role - { - "index_permissions":[ - { - "index_patterns":["ppl-security-demo"], - "allowed_actions":[ - "indices:admin/shards/search_shards", - "indices:data/read/search" - ] - } - ] - } - - PUT _plugins/_security/api/rolesmapping/ccs_role - { - "backend_roles" : [], - "hosts" : [], - "users" : ["test_user"] - } diff --git a/docs/user/ppl/admin/datasources.md b/docs/user/ppl/admin/datasources.md new file mode 100644 index 00000000000..5d89b0eda94 --- /dev/null +++ b/docs/user/ppl/admin/datasources.md @@ -0,0 +1,304 @@ +# Datasource Settings + +## Introduction + +The concept of `datasource` is introduced to support the federation of SQL/PPL query engine to multiple data stores. +This helps PPL users to leverage data from multiple data stores and derive correlation and insights. +Datasource definition provides the information to connect to a data store and also gives a name to them to refer in PPL commands. +Refer below sections for quick setup. +* [Datasource configuration APIs](#datasource-configuration-apis) +* [Master Key config for encrypting credential information](#master-key-config-for-encrypting-credential-information) + +## Definitions of datasource and connector + +* Connector is a component that adapts the query engine to a datastore. For example, Prometheus connector would adapt and help execute the queries to run on Prometheus datastore. connector name is enough in the datasource definition json. +* Datasource is a construct to define how to connect to a data store and which connector to adapt by query engine. + +Example Prometheus Datasource Definition + +```bash +{ + "name" : "my_prometheus", + "connector": "prometheus", + "properties" : { + "prometheus.uri" : "http://localhost:8080", + "prometheus.auth.type" : "basicauth", + "prometheus.auth.username" : "admin", + "prometheus.auth.password" : "admin" + }, + "allowedRoles" : ["prometheus_access"], + "status" : "ACTIVE|DISABLED" +} +``` + +Datasource configuration Restrictions. +* `name`, `connector`, `properties` are required fields in the datasource configuration. +* In case of secure domains, `allowedRoles` can be used to specify the opensearch roles allowed to access the datasource via PPL/SQL. +* If `allowedRoles` are not specified for a datasource, only users with `all_access` could access the datasource in case of secure domains. +* In case of security disabled domains, authorization is disbaled. +* All the datasource names should be unique and match the following regex[`[@*A-Za-z]+?[*a-zA-Z_\-0-9]*`]. +* Allowed Connectors. + * `prometheus` [More details: [Prometheus Connector](connectors/prometheus_connector.md)] +* All the allowed config parameters in `properties` are defined in individual connector pages mentioned above. +* From version 2.13, we have introduced a new optional field `status` which can be used to enable and disable a datasource.When a datasource is disabled, it blocks new queries, resulting in 400 errors for any attempts made on it. By default when a datasource is created, status is ACTIVE. + +## Datasource configuration APIs + +Datasource configuration can be managed using below REST APIs. All the examples below are for OpenSearch domains enabled with secure domain. +we can remove authorization and other details in case of security disabled domains. +* Datasource Creation POST API ("_plugins/_query/_datasources") + +```bash +POST https://localhost:9200/_plugins/_query/_datasources +content-type: application/json +Authorization: Basic {{username}} {{password}} + +{ + "name" : "my_prometheus", + "connector": "prometheus", + "properties" : { + "prometheus.uri" : "http://localhost:8080", + "prometheus.auth.type" : "basicauth", + "prometheus.auth.username" : "admin", + "prometheus.auth.password" : "admin" + }, + "allowedRoles" : ["prometheus_access"] +} + +``` + +* Datasource modification PUT API ("_plugins/_query/_datasources") + +```bash +PUT https://localhost:9200/_plugins/_query/_datasources +content-type: application/json +Authorization: Basic {{username}} {{password}} + +{ + "name" : "my_prometheus", + "connector": "prometheus", + "properties" : { + "prometheus.uri" : "http://localhost:8080", + "prometheus.auth.type" : "basicauth", + "prometheus.auth.username" : "admin", + "prometheus.auth.password" : "admin" + }, + "allowedRoles" : ["prometheus_access"] +} + +``` + +* Datasource modification PATCH API ("_plugins/_query/_datasources") + +```bash +PATCH https://localhost:9200/_plugins/_query/_datasources +content-type: application/json +Authorization: Basic {{username}} {{password}} + +{ + "name" : "my_prometheus", + "allowedRoles" : ["all_access"] +} +``` +**Name is required and must exist. Connector cannot be modified and will be ignored.** + + +* Datasource Read GET API("_plugins/_query/_datasources/{{dataSourceName}}" + +```bash +GET https://localhost:9200/_plugins/_query/_datasources/my_prometheus +content-type: application/json +Authorization: Basic {{username}} {{password}} +``` +**Authentication Information won't be vended out in GET API's response.** + +* Datasource Deletion DELETE API("_plugins/_query/_datasources/{{dataSourceName}}") + +```bash +DELETE https://localhost:9200/_plugins/_query/_datasources/my_prometheus +content-type: application/json +Authorization: Basic {{username}} {{password}} + +``` + +## Authorization of datasource configuration APIs + +Each of the datasource configuration management apis are controlled by following actions respectively. +* cluster:admin/opensearch/datasources/create [Create POST API] +* cluster:admin/opensearch/datasources/read [Get GET API] +* cluster:admin/opensearch/datasources/update [Update PUT API] +* cluster:admin/opensearch/datasources/patch [Update PATCH API] +* cluster:admin/opensearch/datasources/delete [Delete DELETE API] + +Only users mapped with roles having above actions are authorized to execute datasource management apis. +## Master Key config for encrypting credential information + +* When users provide credentials for a data source, the system encrypts and securely stores them in the metadata index. System uses "AES/GCM/NoPadding" symmetric encryption algorithm. +* Master key is a required config and users can set this up by configuring the `plugins.query.datasources.encryption.masterkey` setting in the opensearch.yml file. +* The master key must be 16, 24, or 32 characters long. +* Sample Bash Script to generate a 24 character master key + +```bash +#!/bin/bash +# Generate a 24-character key +master_key=$(openssl rand -hex 12) +echo "Master Key: $master_key" +``` + +* Sample python script to generate a 24 character master key + +```bash +import random +import string + +# Generate a 24-character random master key +master_key = ''.join(random.choices(string.ascii_letters + string.digits, k=24)) + +# Print the master key +print("Generated master key:", master_key) + +``` + +## Datasource URI Hosts Deny Lists Config + +* In the OpenSearch configuration file (opensearch.yml), the parameter "plugins.query.datasources.uri.hosts.denylist" can be utilized to control the permitted host ips within the datasource URI configuration. +* By default, the value is set to empty list, which allows any domain to be accepted. +* For instance, if you set the value to `127.0.0.0/8`, ppl plugins will deny all the query requests where the datasource URI resolves to the ip range from `127.0.0.0` to `127.255.255.255` + +## Using a datasource in PPL command + +Datasource is referred in source command as show in the code block below. +Based on the abstraction designed by the connector, +one can refer the corresponding entity as table in the source command. +For example in prometheus connector, each metric is abstracted as a table. +so we can refer a metric and apply stats over it in the following way. +Example source command with prometheus datasource + +```ppl ignore +source = my_prometheus.prometheus_http_requests_total | stats avg(@value) by job; +``` + +## Authorization of PPL commands on datasources + +In case of secure opensearch domains, only admins and users with roles mentioned in datasource configuration are allowed to make queries. +For example: with below datasource configuration, only admins and users with prometheus_access role can run queries on my_prometheus datasource. + +```bash +{ + "name" : "my_prometheus", + "connector": "prometheus", + "properties" : { + "prometheus.uri" : "http://localhost:8080", + "prometheus.auth.type" : "basicauth", + "prometheus.auth.username" : "admin", + "prometheus.auth.password" : "admin" + }, + "allowedRoles" : ["prometheus_access"] +} +``` + +## Moving from keystore datasource configuration + +* In versions prior to 2.7, the plugins.query.federation.datasources.config key store setting was used to configure datasources, but it has been deprecated and will be removed in version 3.0. +* To port previously configured datasources from the keystore, users can use the `create datasource` REST API mentioned in the above section. + +## Disabling a datasource to block new queries + +* We can disable a datasource using PATCH or PUT API. Below is the example request for disabling a datasource named "my_prometheus" using PATCH API. + +```bash +PATCH https://localhost:9200/_plugins/_query/_datasources +content-type: application/json +Authorization: Basic {{username}} {{password}} + +{ + "name" : "my_prometheus", + "status" : "disabled" +} + + +``` + +## Metadata queries using information_schema + +Use `information_schema` in source command to query tables information under a datasource. +In the current state, `information_schema` only support metadata of tables. +This schema will be extended for views, columns and other metadata info in future. +### Syntax + +source = datasource.information_schema.tables; +### Example 1: Fetch tables in prometheus datasource + +The examples fetches tables in the prometheus datasource. +PPL query for fetching PROMETHEUS TABLES with where clause + +```ppl +source = my_prometheus.information_schema.tables +| where TABLE_NAME='prometheus_http_requests_total' +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------+--------------+--------------------------------+------------+------+---------------------------+ +| TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | TABLE_TYPE | UNIT | REMARKS | +|---------------+--------------+--------------------------------+------------+------+---------------------------| +| my_prometheus | default | prometheus_http_requests_total | counter | | Counter of HTTP requests. | ++---------------+--------------+--------------------------------+------------+------+---------------------------+ +``` + +### Example 2: Search tables in prometheus datasource + +The examples searches tables in the prometheus datasource. +PPL query for searching PROMETHEUS TABLES + +```ppl +source = my_prometheus.information_schema.tables +| where LIKE(TABLE_NAME, "%http%") +``` + +Expected output: + +```text +fetched rows / total rows = 6/6 + +---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------+ +| TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | TABLE_TYPE | UNIT | REMARKS | +|---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------| +| my_prometheus | default | prometheus_http_requests_total | counter | | Counter of HTTP requests. | +| my_prometheus | default | promhttp_metric_handler_requests_in_flight | gauge | | Current number of scrapes being served. | +| my_prometheus | default | prometheus_http_request_duration_seconds | histogram | | Histogram of latencies for HTTP requests. | +| my_prometheus | default | prometheus_sd_http_failures_total | counter | | Number of HTTP service discovery refresh failures. | +| my_prometheus | default | promhttp_metric_handler_requests_total | counter | | Total number of scrapes by HTTP status code. | +| my_prometheus | default | prometheus_http_response_size_bytes | histogram | | Histogram of response size for HTTP requests. | ++---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------+ +``` + +## Fetch metadata for table in Prometheus datasource + +After a Prometheus datasource is configured, you can inspect the schema of any metric by running the `describe` command against the fully qualified table name. For example + +```ppl +describe my_prometheus.prometheus_http_requests_total +``` + +Expected output: + +```text +fetched rows / total rows = 6/6 ++---------------+--------------+--------------------------------+-------------+-----------+ +| TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | COLUMN_NAME | DATA_TYPE | +|---------------+--------------+--------------------------------+-------------+-----------| +| my_prometheus | default | prometheus_http_requests_total | handler | string | +| my_prometheus | default | prometheus_http_requests_total | code | string | +| my_prometheus | default | prometheus_http_requests_total | instance | string | +| my_prometheus | default | prometheus_http_requests_total | @timestamp | timestamp | +| my_prometheus | default | prometheus_http_requests_total | @value | double | +| my_prometheus | default | prometheus_http_requests_total | job | string | ++---------------+--------------+--------------------------------+-------------+-----------+ +``` + +## Limitations + +In using PPL, data sources except OpenSearch can only work with `plugins.calcite.enabled=false`. +When Calcite is enabled, queries against non-OpenSearch data sources will implicit fallback to v2, which means new PPL commands/functions introduced in 3.0.0 and above cannot work together with non-OpenSearch data sources. \ No newline at end of file diff --git a/docs/user/ppl/admin/datasources.rst b/docs/user/ppl/admin/datasources.rst deleted file mode 100644 index c5f9adfd85a..00000000000 --- a/docs/user/ppl/admin/datasources.rst +++ /dev/null @@ -1,290 +0,0 @@ -.. highlight:: sh - -=================== -Datasource Settings -=================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -Introduction -============ - -The concept of ``datasource`` is introduced to support the federation of SQL/PPL query engine to multiple data stores. -This helps PPL users to leverage data from multiple data stores and derive correlation and insights. -Datasource definition provides the information to connect to a data store and also gives a name to them to refer in PPL commands. - -Refer below sections for quick setup. - -* `Datasource configuration APIs`_ -* `Master Key config for encrypting credential information`_ - - -Definitions of datasource and connector -======================================= -* Connector is a component that adapts the query engine to a datastore. For example, Prometheus connector would adapt and help execute the queries to run on Prometheus datastore. connector name is enough in the datasource definition json. -* Datasource is a construct to define how to connect to a data store and which connector to adapt by query engine. - -Example Prometheus Datasource Definition :: - - { - "name" : "my_prometheus", - "connector": "prometheus", - "properties" : { - "prometheus.uri" : "http://localhost:8080", - "prometheus.auth.type" : "basicauth", - "prometheus.auth.username" : "admin", - "prometheus.auth.password" : "admin" - }, - "allowedRoles" : ["prometheus_access"], - "status" : "ACTIVE|DISABLED" - } -Datasource configuration Restrictions. - -* ``name``, ``connector``, ``properties`` are required fields in the datasource configuration. -* In case of secure domains, ``allowedRoles`` can be used to specify the opensearch roles allowed to access the datasource via PPL/SQL. -* If ``allowedRoles`` are not specified for a datasource, only users with ``all_access`` could access the datasource in case of secure domains. -* In case of security disabled domains, authorization is disbaled. -* All the datasource names should be unique and match the following regex[``[@*A-Za-z]+?[*a-zA-Z_\-0-9]*``]. -* Allowed Connectors. - * ``prometheus`` [More details: `Prometheus Connector `_] -* All the allowed config parameters in ``properties`` are defined in individual connector pages mentioned above. -* From version 2.13, we have introduced a new optional field ``status`` which can be used to enable and disable a datasource.When a datasource is disabled, it blocks new queries, resulting in 400 errors for any attempts made on it. By default when a datasource is created, status is ACTIVE. - - -Datasource configuration APIs -============================= -Datasource configuration can be managed using below REST APIs. All the examples below are for OpenSearch domains enabled with secure domain. -we can remove authorization and other details in case of security disabled domains. - -* Datasource Creation POST API ("_plugins/_query/_datasources") :: - - POST https://localhost:9200/_plugins/_query/_datasources - content-type: application/json - Authorization: Basic {{username}} {{password}} - - { - "name" : "my_prometheus", - "connector": "prometheus", - "properties" : { - "prometheus.uri" : "http://localhost:8080", - "prometheus.auth.type" : "basicauth", - "prometheus.auth.username" : "admin", - "prometheus.auth.password" : "admin" - }, - "allowedRoles" : ["prometheus_access"] - } - -* Datasource modification PUT API ("_plugins/_query/_datasources") :: - - PUT https://localhost:9200/_plugins/_query/_datasources - content-type: application/json - Authorization: Basic {{username}} {{password}} - - { - "name" : "my_prometheus", - "connector": "prometheus", - "properties" : { - "prometheus.uri" : "http://localhost:8080", - "prometheus.auth.type" : "basicauth", - "prometheus.auth.username" : "admin", - "prometheus.auth.password" : "admin" - }, - "allowedRoles" : ["prometheus_access"] - } - -* Datasource modification PATCH API ("_plugins/_query/_datasources") :: - - PATCH https://localhost:9200/_plugins/_query/_datasources - content-type: application/json - Authorization: Basic {{username}} {{password}} - - { - "name" : "my_prometheus", - "allowedRoles" : ["all_access"] - } - - **Name is required and must exist. Connector cannot be modified and will be ignored.** - -* Datasource Read GET API("_plugins/_query/_datasources/{{dataSourceName}}" :: - - GET https://localhost:9200/_plugins/_query/_datasources/my_prometheus - content-type: application/json - Authorization: Basic {{username}} {{password}} - - **Authentication Information won't be vended out in GET API's response.** - -* Datasource Deletion DELETE API("_plugins/_query/_datasources/{{dataSourceName}}") :: - - DELETE https://localhost:9200/_plugins/_query/_datasources/my_prometheus - content-type: application/json - Authorization: Basic {{username}} {{password}} - -Authorization of datasource configuration APIs -============================================== -Each of the datasource configuration management apis are controlled by following actions respectively. - -* cluster:admin/opensearch/datasources/create [Create POST API] -* cluster:admin/opensearch/datasources/read [Get GET API] -* cluster:admin/opensearch/datasources/update [Update PUT API] -* cluster:admin/opensearch/datasources/patch [Update PATCH API] -* cluster:admin/opensearch/datasources/delete [Delete DELETE API] - -Only users mapped with roles having above actions are authorized to execute datasource management apis. - -Master Key config for encrypting credential information -======================================================== -* When users provide credentials for a data source, the system encrypts and securely stores them in the metadata index. System uses "AES/GCM/NoPadding" symmetric encryption algorithm. -* Master key is a required config and users can set this up by configuring the `plugins.query.datasources.encryption.masterkey` setting in the opensearch.yml file. -* The master key must be 16, 24, or 32 characters long. -* Sample Bash Script to generate a 24 character master key :: - - #!/bin/bash - # Generate a 24-character key - master_key=$(openssl rand -hex 12) - echo "Master Key: $master_key" -* Sample python script to generate a 24 character master key :: - - import random - import string - - # Generate a 24-character random master key - master_key = ''.join(random.choices(string.ascii_letters + string.digits, k=24)) - - # Print the master key - print("Generated master key:", master_key) - -Datasource URI Hosts Deny Lists Config -====================================== -* In the OpenSearch configuration file (opensearch.yml), the parameter "plugins.query.datasources.uri.hosts.denylist" can be utilized to control the permitted host ips within the datasource URI configuration. -* By default, the value is set to empty list, which allows any domain to be accepted. -* For instance, if you set the value to `127.0.0.0/8`, ppl plugins will deny all the query requests where the datasource URI resolves to the ip range from `127.0.0.0` to `127.255.255.255` - - -Using a datasource in PPL command -================================= -Datasource is referred in source command as show in the code block below. -Based on the abstraction designed by the connector, -one can refer the corresponding entity as table in the source command. -For example in prometheus connector, each metric is abstracted as a table. -so we can refer a metric and apply stats over it in the following way. - -Example source command with prometheus datasource :: - - >> source = my_prometheus.prometheus_http_requests_total | stats avg(@value) by job; - - -Authorization of PPL commands on datasources -============================================ -In case of secure opensearch domains, only admins and users with roles mentioned in datasource configuration are allowed to make queries. -For example: with below datasource configuration, only admins and users with prometheus_access role can run queries on my_prometheus datasource. :: - - { - "name" : "my_prometheus", - "connector": "prometheus", - "properties" : { - "prometheus.uri" : "http://localhost:8080", - "prometheus.auth.type" : "basicauth", - "prometheus.auth.username" : "admin", - "prometheus.auth.password" : "admin" - }, - "allowedRoles" : ["prometheus_access"] - } - - -Moving from keystore datasource configuration -============================================= -* In versions prior to 2.7, the plugins.query.federation.datasources.config key store setting was used to configure datasources, but it has been deprecated and will be removed in version 3.0. -* To port previously configured datasources from the keystore, users can use the `create datasource` REST API mentioned in the above section. - -Disabling a datasource to block new queries -=========================================== -* We can disable a datasource using PATCH or PUT API. Below is the example request for disabling a datasource named "my_prometheus" using PATCH API. :: - - PATCH https://localhost:9200/_plugins/_query/_datasources - content-type: application/json - Authorization: Basic {{username}} {{password}} - - { - "name" : "my_prometheus", - "status" : "disabled" - } - - -Metadata queries using information_schema -========================================= -Use ``information_schema`` in source command to query tables information under a datasource. -In the current state, ``information_schema`` only support metadata of tables. -This schema will be extended for views, columns and other metadata info in future. - -Syntax ------- -source = datasource.information_schema.tables; - -Example 1: Fetch tables in prometheus datasource ------------------------------------------------- - -The examples fetches tables in the prometheus datasource. - -PPL query for fetching PROMETHEUS TABLES with where clause:: - - PPL> source = my_prometheus.information_schema.tables | where TABLE_NAME='prometheus_http_requests_total' - fetched rows / total rows = 1/1 - +---------------+--------------+--------------------------------+------------+------+---------------------------+ - | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | TABLE_TYPE | UNIT | REMARKS | - |---------------+--------------+--------------------------------+------------+------+---------------------------| - | my_prometheus | default | prometheus_http_requests_total | counter | | Counter of HTTP requests. | - +---------------+--------------+--------------------------------+------------+------+---------------------------+ - - -Example 2: Search tables in prometheus datasource -------------------------------------------------- - -The examples searches tables in the prometheus datasource. - -PPL query for searching PROMETHEUS TABLES:: - - PPL> source = my_prometheus.information_schema.tables | where LIKE(TABLE_NAME, "%http%"); - fetched rows / total rows = 6/6 - +---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------+ - | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | TABLE_TYPE | UNIT | REMARKS | - |---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------| - | my_prometheus | default | prometheus_http_requests_total | counter | | Counter of HTTP requests. | - | my_prometheus | default | promhttp_metric_handler_requests_in_flight | gauge | | Current number of scrapes being served. | - | my_prometheus | default | prometheus_http_request_duration_seconds | histogram | | Histogram of latencies for HTTP requests. | - | my_prometheus | default | prometheus_sd_http_failures_total | counter | | Number of HTTP service discovery refresh failures. | - | my_prometheus | default | promhttp_metric_handler_requests_total | counter | | Total number of scrapes by HTTP status code. | - | my_prometheus | default | prometheus_http_response_size_bytes | histogram | | Histogram of response size for HTTP requests. | - +---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------+ - - -.. _datasources-prometheus-metadata: - -Fetch metadata for table in Prometheus datasource -================================================= - -After a Prometheus datasource is configured, you can inspect the schema of any metric by running the ``describe`` command against the fully qualified table name. For example:: - -PPL query:: - - PPL> describe my_prometheus.prometheus_http_requests_total; - fetched rows / total rows = 6/6 - +---------------+--------------+--------------------------------+-------------+-----------+ - | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | COLUMN_NAME | DATA_TYPE | - |---------------+--------------+--------------------------------+-------------+-----------| - | my_prometheus | default | prometheus_http_requests_total | handler | string | - | my_prometheus | default | prometheus_http_requests_total | code | string | - | my_prometheus | default | prometheus_http_requests_total | instance | string | - | my_prometheus | default | prometheus_http_requests_total | @timestamp | timestamp | - | my_prometheus | default | prometheus_http_requests_total | @value | double | - | my_prometheus | default | prometheus_http_requests_total | job | string | - +---------------+--------------+--------------------------------+-------------+-----------+ - -Limitations -=========== - -In using PPL, data sources except OpenSearch can only work with ``plugins.calcite.enabled=false``. -When Calcite is enabled, queries against non-OpenSearch data sources will implicit fallback to v2, which means new PPL commands/functions introduced in 3.0.0 and above cannot work together with non-OpenSearch data sources. diff --git a/docs/user/ppl/admin/monitoring.md b/docs/user/ppl/admin/monitoring.md new file mode 100644 index 00000000000..7a85a0bac01 --- /dev/null +++ b/docs/user/ppl/admin/monitoring.md @@ -0,0 +1,35 @@ +# Monitoring + +## Introduction + +By a stats endpoint, you are able to collect metrics for the plugin within the interval. Note that only node level statistics collecting is implemented for now. In other words, you only get the metrics for the node you're accessing. Cluster level statistics have yet to be implemented. + +## Node Stats + +### Description + +The meaning of fields in the response is as follows: + +| Field name | Description | +|------------|-------------| +| `ppl_request_total` | Total count of PPL request | +| `ppl_request_count` | Total count of PPL request within the interval | +| `ppl_failed_request_count_syserr` | Count of failed PPL request due to system error within the interval | +| `ppl_failed_request_count_cuserr` | Count of failed PPL request due to bad request within the interval | + +### Example + +```bash ignore +curl -H 'Content-Type: application/json' -X GET localhost:9200/_plugins/_ppl/stats +``` + +```json +{ + "ppl_request_total": 10, + "ppl_request_count": 2, + "ppl_failed_request_count_syserr": 0, + "ppl_failed_request_count_cuserr": 0, + ... +} +``` + \ No newline at end of file diff --git a/docs/user/ppl/admin/monitoring.rst b/docs/user/ppl/admin/monitoring.rst deleted file mode 100644 index 625b0411c49..00000000000 --- a/docs/user/ppl/admin/monitoring.rst +++ /dev/null @@ -1,56 +0,0 @@ -.. highlight:: sh - -========== -Monitoring -========== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -Introduction -============ - -By a stats endpoint, you are able to collect metrics for the plugin within the interval. Note that only node level statistics collecting is implemented for now. In other words, you only get the metrics for the node you're accessing. Cluster level statistics have yet to be implemented. - -Node Stats -========== - -Description ------------ - -The meaning of fields in the response is as follows: - -+--------------------------------+-------------------------------------------------------------------+ -| Field name| Description| -+================================+===================================================================+ -| ppl_request_total| Total count of PPL request| -+--------------------------------+-------------------------------------------------------------------+ -| ppl_request_count| Total count of PPL request within the interval| -+--------------------------------+-------------------------------------------------------------------+ -| ppl_failed_request_count_syserr|Count of failed PPL request due to system error within the interval| -+--------------------------------+-------------------------------------------------------------------+ -| ppl_failed_request_count_cuserr| Count of failed PPL request due to bad request within the interval| -+--------------------------------+-------------------------------------------------------------------+ - - -Example -------- - -SQL query:: - - >> curl -H 'Content-Type: application/json' -X GET localhost:9200/_plugins/_ppl/stats - -Result set:: - - { - "ppl_request_total": 10, - "ppl_request_count": 2, - "ppl_failed_request_count_syserr": 0, - "ppl_failed_request_count_cuserr": 0, - ... - } - diff --git a/docs/user/ppl/admin/security.md b/docs/user/ppl/admin/security.md new file mode 100644 index 00000000000..d41f3b23e23 --- /dev/null +++ b/docs/user/ppl/admin/security.md @@ -0,0 +1,65 @@ +# Security Settings + +## Introduction + +User needs `cluster:admin/opensearch/ppl` permission to use PPL plugin. User also needs indices level permission `indices:admin/mappings/get` to get field mappings, `indices:monitor/settings/get` to get cluster settings, and `indices:data/read/search*` to search index. +## Using Rest API + +**--INTRODUCED 2.1--** + +Example: Create the ppl_role for test_user. then test_user could use PPL to query `ppl-security-demo` index. +1. Create the ppl_role and grant permission to access PPL plugin and access ppl-security-demo index + +```bash +PUT _plugins/_security/api/roles/ppl_role +{ + "cluster_permissions": [ + "cluster:admin/opensearch/ppl" + ], + "index_permissions": [{ + "index_patterns": [ + "ppl-security-demo" + ], + "allowed_actions": [ + "indices:data/read/search*", + "indices:admin/mappings/get", + "indices:monitor/settings/get" + ] + }] +} + +``` + +2. Mapping the test_user to the ppl_role + +```bash +PUT _plugins/_security/api/rolesmapping/ppl_role +{ + "backend_roles" : [], + "hosts" : [], + "users" : ["test_user"] +} + + +``` + +## Using Security Dashboard + +**--INTRODUCED 2.1--** + +Example: Create ppl_access permission and add to existing role +1. Create the ppl_access permission + +```bash +PUT _plugins/_security/api/actiongroups/ppl_access +{ + "allowed_actions": [ + "cluster:admin/opensearch/ppl" + ] +} + +``` + +2. Grant the ppl_access permission to ppl_test_role + +![Image](https://user-images.githubusercontent.com/2969395/185448976-6c0aed6b-7540-4b99-92c3-362da8ae3763.png) diff --git a/docs/user/ppl/admin/security.rst b/docs/user/ppl/admin/security.rst deleted file mode 100644 index e512cc259c2..00000000000 --- a/docs/user/ppl/admin/security.rst +++ /dev/null @@ -1,70 +0,0 @@ -.. highlight:: sh - -================= -Security Settings -================= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -Introduction -============ - -User needs ``cluster:admin/opensearch/ppl`` permission to use PPL plugin. User also needs indices level permission ``indices:admin/mappings/get`` to get field mappings, ``indices:monitor/settings/get`` to get cluster settings, and ``indices:data/read/search*`` to search index. - -Using Rest API -============== -**--INTRODUCED 2.1--** - -Example: Create the ppl_role for test_user. then test_user could use PPL to query ``ppl-security-demo`` index. - -1. Create the ppl_role and grand permission to access PPL plugin and access ppl-security-demo index:: - - PUT _plugins/_security/api/roles/ppl_role - { - "cluster_permissions": [ - "cluster:admin/opensearch/ppl" - ], - "index_permissions": [{ - "index_patterns": [ - "ppl-security-demo" - ], - "allowed_actions": [ - "indices:data/read/search*", - "indices:admin/mappings/get", - "indices:monitor/settings/get" - ] - }] - } - -2. Mapping the test_user to the ppl_role:: - - PUT _plugins/_security/api/rolesmapping/ppl_role - { - "backend_roles" : [], - "hosts" : [], - "users" : ["test_user"] - } - - -Using Security Dashboard -======================== -**--INTRODUCED 2.1--** - -Example: Create ppl_access permission and add to existing role - -1. Create the ppl_access permission:: - - PUT _plugins/_security/api/actiongroups/ppl_access - { - "allowed_actions": [ - "cluster:admin/opensearch/ppl" - ] - } - -2. Grant the ppl_access permission to ppl_test_role - -.. image:: https://user-images.githubusercontent.com/2969395/185448976-6c0aed6b-7540-4b99-92c3-362da8ae3763.png diff --git a/docs/user/ppl/admin/settings.md b/docs/user/ppl/admin/settings.md new file mode 100644 index 00000000000..e521ab6c11f --- /dev/null +++ b/docs/user/ppl/admin/settings.md @@ -0,0 +1,441 @@ +# PPL Settings + +## Introduction + +When OpenSearch bootstraps, PPL plugin will register a few settings in OpenSearch cluster settings. Most of the settings are able to change dynamically so you can control the behavior of PPL plugin without need to bounce your cluster. +## plugins.ppl.enabled + +### Description + +You can disable SQL plugin to reject all coming requests. +1. The default value is true. +2. This setting is node scope. +3. This setting can be updated dynamically. + +Notes. Calls to _plugins/_ppl include index names in the request body, so they have the same access policy considerations as the bulk, mget, and msearch operations. if rest.action.multi.allow_explicit_index set to false, PPL plugin will be disabled. +### Example 1 + +You can update the setting with a new value like this. + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"transient" : {"plugins.ppl.enabled" : "false"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "ppl": { + "enabled": "false" + } + } + } +} + +``` + +Note: the legacy settings of `opendistro.ppl.enabled` is deprecated, it will fallback to the new settings if you request an update with the legacy name. +### Example 2 + +Query result after the setting updated is like: + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{"query": "source=my_prometheus"}' +``` + +Expected output: + +```json +{ + "error": { + "reason": "Invalid Query", + "details": "Either plugins.ppl.enabled or rest.action.multi.allow_explicit_index setting is false", + "type": "IllegalAccessException" + }, + "status": 400 +} + +``` + +## plugins.ppl.query.timeout + +### Description + +This setting controls the maximum execution time for PPL queries. When a query exceeds this timeout, it will be interrupted and return a timeout error. +1. The default value is 300s (5 minutes). +2. This setting is node scope. +3. This setting can be updated dynamically. + +### Example + +You can configure the query timeout: + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"transient" : {"plugins.ppl.query.timeout" : "60s"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "ppl": { + "query": { + "timeout": "60s" + } + } + } + } +} +``` + +## plugins.query.memory_limit + +### Description + +You can set heap memory usage limit for the query engine. When query running, it will detected whether the heap memory usage under the limit, if not, it will terminated the current query. The default value is: 85% +### Example + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"persistent" : {"plugins.query.memory_limit" : "80%"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": { + "plugins": { + "query": { + "memory_limit": "80%" + } + } + }, + "transient": {} +} + +``` + +Note: the legacy settings of `opendistro.ppl.query.memory_limit` is deprecated, it will fallback to the new settings if you request an update with the legacy name. +## plugins.query.size_limit + +### Description + +The size configures the maximum amount of rows to be fetched from PPL execution results. The default value is: 10000 +### Example + +Change the size_limit to 1000 + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"persistent" : {"plugins.query.size_limit" : "1000"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": { + "plugins": { + "query": { + "size_limit": "1000" + } + } + }, + "transient": {} +} +``` + +Note: the legacy settings of `opendistro.query.size_limit` is deprecated, it will fallback to the new settings if you request an update with the legacy name. +## plugins.query.buckets + +### Version + +3.4.0 +### Description + +This configuration indicates how many aggregation buckets will return in a single response. The default value equals to `plugins.query.size_limit`. +You can change the value to any value not greater than the maximum number of aggregation buckets allowed in a single response (`search.max_buckets`), here is an example + +```bash ppl +curl -sS -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ + "transient" : { + "plugins.query.buckets" : 1000 + } +}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "query": { + "buckets": "1000" + } + } + } +} +``` + +### Limitations + +The number of aggregation buckets is fixed to `1000` in v2. `plugins.query.buckets` can only effect the number of aggregation buckets when calcite enabled. +## plugins.calcite.all_join_types.allowed + +### Description + +Since 3.3.0, join types `inner`, `left`, `outer` (alias of `left`), `semi` and `anti` are supported by default. `right`, `full`, `cross` are performance sensitive join types which are disabled by default. Set config `plugins.calcite.all_join_types.allowed = true` to enable. +### Example + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"transient" : {"plugins.calcite.all_join_types.allowed" : "true"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "calcite": { + "all_join_types": { + "allowed": "true" + } + } + } + } +} +``` + +## plugins.ppl.syntax.legacy.preferred + +### Description + +This configuration is introduced since 3.3.0 which is used to switch some behaviours in PPL syntax. The current default value is `true`. +The behaviours it controlled includes: +- The default value of argument `bucket_nullable` in `stats` command. Check [stats command](../cmd/stats.md) for details. +- The return value of `divide` and `/` operator. Check [expressions](../functions/expressions.md) for details. +- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details. + +### Example 1 + +You can update the setting with a new value like this. + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"transient" : {"plugins.ppl.syntax.legacy.preferred" : "false"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "ppl": { + "syntax": { + "legacy": { + "preferred": "false" + } + } + } + } + } +} + +``` + +### Example 2 + +Reset to default (true) by setting to null: + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"transient" : {"plugins.ppl.syntax.legacy.preferred" : null}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": {}, + "transient": {} +} +``` + +## plugins.ppl.values.max.limit + +### Description + +This setting controls the maximum number of unique values that the `VALUES` aggregation function can return. When set to 0 (the default), there is no limit on the number of unique values returned. When set to a positive integer, the function will return at most that many unique values. +1. The default value is 0 (unlimited). +2. This setting is node scope. +3. This setting can be updated dynamically. + +The `VALUES` function collects all unique values from a field and returns them in lexicographical order. This setting helps manage memory usage by limiting the number of values collected. +### Example 1 + +Set the limit to 1000 unique values: + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"transient" : {"plugins.ppl.values.max.limit" : "1000"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "ppl": { + "values": { + "max": { + "limit": "1000" + } + } + } + } + } +} + +``` + +### Example 2 + +Set to 0 explicitly for unlimited values: + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"transient" : {"plugins.ppl.values.max.limit" : "0"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "ppl": { + "values": { + "max": { + "limit": "0" + } + } + } + } + } +} + + +``` + +## plugins.ppl.subsearch.maxout + +### Description + +The size configures the maximum of rows to return from subsearch. The default value is: `10000`. A value of `0` indicates that the restriction is unlimited. +### Version + +3.4.0 +### Example + +Change the subsearch.maxout to unlimited + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"persistent" : {"plugins.ppl.subsearch.maxout" : "0"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": { + "plugins": { + "ppl": { + "subsearch": { + "maxout": "0" + } + } + } + }, + "transient": {} +} +``` + +## plugins.ppl.join.subsearch_maxout + +### Description + +The size configures the maximum of rows from subsearch to join against. This configuration impacts `join` command. The default value is: `50000`. A value of `0` indicates that the restriction is unlimited. +### Version + +3.4.0 +### Example + +Change the join.subsearch_maxout to 5000 + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"persistent" : {"plugins.ppl.join.subsearch_maxout" : "5000"}}' +``` + +Expected output: + +```json +{ + "acknowledged": true, + "persistent": { + "plugins": { + "ppl": { + "join": { + "subsearch_maxout": "5000" + } + } + } + }, + "transient": {} +} +``` + \ No newline at end of file diff --git a/docs/user/ppl/admin/settings.rst b/docs/user/ppl/admin/settings.rst deleted file mode 100644 index ef9eba207fa..00000000000 --- a/docs/user/ppl/admin/settings.rst +++ /dev/null @@ -1,427 +0,0 @@ -.. highlight:: sh - -============ -PPL Settings -============ - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -Introduction -============ - -When OpenSearch bootstraps, PPL plugin will register a few settings in OpenSearch cluster settings. Most of the settings are able to change dynamically so you can control the behavior of PPL plugin without need to bounce your cluster. - -plugins.ppl.enabled -=================== - -Description ------------ - -You can disable SQL plugin to reject all coming requests. - -1. The default value is true. -2. This setting is node scope. -3. This setting can be updated dynamically. - -Notes. Calls to _plugins/_ppl include index names in the request body, so they have the same access policy considerations as the bulk, mget, and msearch operations. if rest.action.multi.allow_explicit_index set to false, PPL plugin will be disabled. - -Example 1 ---------- - -You can update the setting with a new value like this. - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"transient" : {"plugins.ppl.enabled" : "false"}}' - { - "acknowledged": true, - "persistent": {}, - "transient": { - "plugins": { - "ppl": { - "enabled": "false" - } - } - } - } - -Note: the legacy settings of ``opendistro.ppl.enabled`` is deprecated, it will fallback to the new settings if you request an update with the legacy name. - -Example 2 ---------- - -Query result after the setting updated is like: - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl \ - ... -d '{"query": "source=my_prometheus"}' - { - "error": { - "reason": "Invalid Query", - "details": "Either plugins.ppl.enabled or rest.action.multi.allow_explicit_index setting is false", - "type": "IllegalAccessException" - }, - "status": 400 - } - -plugins.ppl.query.timeout -========================= - -Description ------------ - -This setting controls the maximum execution time for PPL queries. When a query exceeds this timeout, it will be interrupted and return a timeout error. - -1. The default value is 300s (5 minutes). -2. This setting is node scope. -3. This setting can be updated dynamically. - -Example -------- - -You can configure the query timeout: - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"transient" : {"plugins.ppl.query.timeout" : "60s"}}' - { - "acknowledged": true, - "persistent": {}, - "transient": { - "plugins": { - "ppl": { - "query": { - "timeout": "60s" - } - } - } - } - } - -plugins.query.memory_limit -========================== - -Description ------------ - -You can set heap memory usage limit for the query engine. When query running, it will detected whether the heap memory usage under the limit, if not, it will terminated the current query. The default value is: 85% - -Example -------- - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"persistent" : {"plugins.query.memory_limit" : "80%"}}' - { - "acknowledged": true, - "persistent": { - "plugins": { - "query": { - "memory_limit": "80%" - } - } - }, - "transient": {} - } - -Note: the legacy settings of ``opendistro.ppl.query.memory_limit`` is deprecated, it will fallback to the new settings if you request an update with the legacy name. - -plugins.query.size_limit -======================== - -Description ------------ - -The size configures the maximum amount of rows to be fetched from PPL execution results. The default value is: 10000 - -Example -------- - -Change the size_limit to 1000:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"persistent" : {"plugins.query.size_limit" : "1000"}}' - { - "acknowledged": true, - "persistent": { - "plugins": { - "query": { - "size_limit": "1000" - } - } - }, - "transient": {} - } - -Note: the legacy settings of ``opendistro.query.size_limit`` is deprecated, it will fallback to the new settings if you request an update with the legacy name. - -plugins.query.buckets -===================== - -Version -------- -3.4.0 - -Description ------------ - -This configuration indicates how many aggregation buckets will return in a single response. The default value equals to ``plugins.query.size_limit``. -You can change the value to any value not greater than the maximum number of aggregation buckets allowed in a single response (`search.max_buckets`), here is an example:: - - >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ - "transient" : { - "plugins.query.buckets" : 1000 - } - }' - -Result set:: - - { - "acknowledged" : true, - "persistent" : { }, - "transient" : { - "plugins" : { - "query" : { - "buckets" : "1000" - } - } - } - } - -Limitations ------------ -The number of aggregation buckets is fixed to ``1000`` in v2. ``plugins.query.buckets`` can only effect the number of aggregation buckets when calcite enabled. - -plugins.calcite.all_join_types.allowed -====================================== - -Description ------------ - -Since 3.3.0, join types ``inner``, ``left``, ``outer`` (alias of ``left``), ``semi`` and ``anti`` are supported by default. ``right``, ``full``, ``cross`` are performance sensitive join types which are disabled by default. Set config ``plugins.calcite.all_join_types.allowed = true`` to enable. - -Example -------- - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"transient" : {"plugins.calcite.all_join_types.allowed" : "true"}}' - { - "acknowledged": true, - "persistent": {}, - "transient": { - "plugins": { - "calcite": { - "all_join_types": { - "allowed": "true" - } - } - } - } - } - -plugins.ppl.syntax.legacy.preferred -=================================== - -Description ------------ - -This configuration is introduced since 3.3.0 which is used to switch some behaviours in PPL syntax. The current default value is ``true``. -The behaviours it controlled includes: - -- The default value of argument ``bucket_nullable`` in ``stats`` command. Check `stats command <../cmd/stats.rst>`_ for details. -- The return value of ``divide`` and ``/`` operator. Check `expressions <../functions/expressions.rst>`_ for details. -- The default value of argument ``usenull`` in ``top`` and ``rare`` commands. Check `top command <../cmd/top.rst>`_ and `rare command <../cmd/rare.rst>`_ for details. - -Example 1 -------- - -You can update the setting with a new value like this. - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"transient" : {"plugins.ppl.syntax.legacy.preferred" : "false"}}' - { - "acknowledged": true, - "persistent": {}, - "transient": { - "plugins": { - "ppl": { - "syntax": { - "legacy": { - "preferred": "false" - } - } - } - } - } - } - -Example 2 ---------- - -Reset to default (true) by setting to null: - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"transient" : {"plugins.ppl.syntax.legacy.preferred" : null}}' - { - "acknowledged": true, - "persistent": {}, - "transient": {} - } - -plugins.ppl.values.max.limit -============================ - -Description ------------ - -This setting controls the maximum number of unique values that the ``VALUES`` aggregation function can return. When set to 0 (the default), there is no limit on the number of unique values returned. When set to a positive integer, the function will return at most that many unique values. - -1. The default value is 0 (unlimited). -2. This setting is node scope. -3. This setting can be updated dynamically. - -The ``VALUES`` function collects all unique values from a field and returns them in lexicographical order. This setting helps manage memory usage by limiting the number of values collected. - -Example 1 ---------- - -Set the limit to 1000 unique values: - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"transient" : {"plugins.ppl.values.max.limit" : "1000"}}' - { - "acknowledged": true, - "persistent": {}, - "transient": { - "plugins": { - "ppl": { - "values": { - "max": { - "limit": "1000" - } - } - } - } - } - } - -Example 2 ---------- - -Set to 0 explicitly for unlimited values: - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"transient" : {"plugins.ppl.values.max.limit" : "0"}}' - { - "acknowledged": true, - "persistent": {}, - "transient": { - "plugins": { - "ppl": { - "values": { - "max": { - "limit": "0" - } - } - } - } - } - } - - -plugins.ppl.subsearch.maxout -============================ - -Description ------------ - -The size configures the maximum of rows to return from subsearch. The default value is: ``10000``. A value of ``0`` indicates that the restriction is unlimited. - -Version -------- -3.4.0 - -Example -------- - -Change the subsearch.maxout to unlimited:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"persistent" : {"plugins.ppl.subsearch.maxout" : "0"}}' - { - "acknowledged": true, - "persistent": { - "plugins": { - "ppl": { - "subsearch": { - "maxout": "0" - } - } - } - }, - "transient": {} - } - -plugins.ppl.join.subsearch_maxout -================================= - -Description ------------ - -The size configures the maximum of rows from subsearch to join against. This configuration impacts ``join`` command. The default value is: ``50000``. A value of ``0`` indicates that the restriction is unlimited. - -Version -------- -3.4.0 - -Example -------- - -Change the join.subsearch_maxout to 5000:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"persistent" : {"plugins.ppl.join.subsearch_maxout" : "5000"}}' - { - "acknowledged": true, - "persistent": { - "plugins": { - "ppl": { - "join": { - "subsearch_maxout": "5000" - } - } - } - }, - "transient": {} - } diff --git a/docs/user/ppl/cmd/ad.md b/docs/user/ppl/cmd/ad.md new file mode 100644 index 00000000000..6d18396506b --- /dev/null +++ b/docs/user/ppl/cmd/ad.md @@ -0,0 +1,124 @@ +# ad (deprecated by ml command) + +## Description + +The `ad` command applies Random Cut Forest (RCF) algorithm in the ml-commons plugin on the search result returned by a PPL command. Based on the input, the command uses two types of RCF algorithms: fixed-in-time RCF for processing time-series data, batch RCF for processing non-time-series data. +## Syntax + +## Fixed In Time RCF For Time-series Data + +ad [number_of_trees] [shingle_size] [sample_size] [output_after] [time_decay] [anomaly_rate] \ [date_format] [time_zone] [category_field] +* number_of_trees: optional. Number of trees in the forest. **Default:** 30. +* shingle_size: optional. A shingle is a consecutive sequence of the most recent records. **Default:** 8. +* sample_size: optional. The sample size used by stream samplers in this forest. **Default:** 256. +* output_after: optional. The number of points required by stream samplers before results are returned. **Default:** 32. +* time_decay: optional. The decay factor used by stream samplers in this forest. **Default:** 0.0001. +* anomaly_rate: optional. The anomaly rate. **Default:** 0.005. +* time_field: mandatory. Specifies the time field for RCF to use as time-series data. +* date_format: optional. Used for formatting time_field. **Default:** "yyyy-MM-dd HH:mm:ss". +* time_zone: optional. Used for setting time zone for time_field. **Default:** "UTC". +* category_field: optional. Specifies the category field used to group inputs. Each category will be independently predicted. + +## Batch RCF For Non-time-series Data + +ad [number_of_trees] [sample_size] [output_after] [training_data_size] [anomaly_score_threshold] [category_field] +* number_of_trees: optional. Number of trees in the forest. **Default:** 30. +* sample_size: optional. Number of random samples given to each tree from the training data set. **Default:** 256. +* output_after: optional. The number of points required by stream samplers before results are returned. **Default:** 32. +* training_data_size: optional. **Default:** size of your training data set. +* anomaly_score_threshold: optional. The threshold of anomaly score. **Default:** 1.0. +* category_field: optional. Specifies the category field used to group inputs. Each category will be independently predicted. + +## Example 1: Detecting events in New York City from taxi ridership data with time-series data + +This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data. + +```ppl ignore +source=nyc_taxi +| fields value, timestamp +| AD time_field='timestamp' +| where value=10844.0 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+---------------------+-------+---------------+ +| value | timestamp | score | anomaly_grade | +|---------+---------------------+-------+---------------| +| 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | ++---------+---------------------+-------+---------------+ +``` + +## Example 2: Detecting events in New York City from taxi ridership data with time-series data independently with each category + +This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data with multiple category values. + +```ppl ignore +source=nyc_taxi +| fields category, value, timestamp +| AD time_field='timestamp' category_field='category' +| where value=10844.0 or value=6526.0 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------+---------+---------------------+-------+---------------+ +| category | value | timestamp | score | anomaly_grade | +|----------+---------+---------------------+-------+---------------| +| night | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | +| day | 6526.0 | 2014-07-01 06:00:00 | 0.0 | 0.0 | ++----------+---------+---------------------+-------+---------------+ +``` + +## Example 3: Detecting events in New York City from taxi ridership data with non-time-series data + +This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data. + +```ppl ignore +source=nyc_taxi +| fields value +| AD +| where value=10844.0 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+-------+-----------+ +| value | score | anomalous | +|---------+-------+-----------| +| 10844.0 | 0.0 | False | ++---------+-------+-----------+ +``` + +## Example 4: Detecting events in New York City from taxi ridership data with non-time-series data independently with each category + +This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data with multiple category values. + +```ppl ignore +source=nyc_taxi +| fields category, value +| AD category_field='category' +| where value=10844.0 or value=6526.0 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------+---------+-------+-----------+ +| category | value | score | anomalous | +|----------+---------+-------+-----------| +| night | 10844.0 | 0.0 | False | +| day | 6526.0 | 0.0 | False | ++----------+---------+-------+-----------+ +``` + +## Limitations + +The `ad` command can only work with `plugins.calcite.enabled=false`. \ No newline at end of file diff --git a/docs/user/ppl/cmd/ad.rst b/docs/user/ppl/cmd/ad.rst deleted file mode 100644 index 26502dea682..00000000000 --- a/docs/user/ppl/cmd/ad.rst +++ /dev/null @@ -1,112 +0,0 @@ -============================= -ad (deprecated by ml command) -============================= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``ad`` command applies Random Cut Forest (RCF) algorithm in the ml-commons plugin on the search result returned by a PPL command. Based on the input, the command uses two types of RCF algorithms: fixed in time RCF for processing time-series data, batch RCF for processing non-time-series data. - - -Syntax -====== - -Fixed In Time RCF For Time-series Data --------------------------------------- -ad [number_of_trees] [shingle_size] [sample_size] [output_after] [time_decay] [anomaly_rate] [date_format] [time_zone] [category_field] - -* number_of_trees: optional. Number of trees in the forest. **Default:** 30. -* shingle_size: optional. A shingle is a consecutive sequence of the most recent records. **Default:** 8. -* sample_size: optional. The sample size used by stream samplers in this forest. **Default:** 256. -* output_after: optional. The number of points required by stream samplers before results are returned. **Default:** 32. -* time_decay: optional. The decay factor used by stream samplers in this forest. **Default:** 0.0001. -* anomaly_rate: optional. The anomaly rate. **Default:** 0.005. -* time_field: mandatory. Specifies the time field for RCF to use as time-series data. -* date_format: optional. Used for formatting time_field. **Default:** "yyyy-MM-dd HH:mm:ss". -* time_zone: optional. Used for setting time zone for time_field. **Default:** "UTC". -* category_field: optional. Specifies the category field used to group inputs. Each category will be independently predicted. - -Batch RCF For Non-time-series Data ----------------------------------- -ad [number_of_trees] [sample_size] [output_after] [training_data_size] [anomaly_score_threshold] [category_field] - -* number_of_trees: optional. Number of trees in the forest. **Default:** 30. -* sample_size: optional. Number of random samples given to each tree from the training data set. **Default:** 256. -* output_after: optional. The number of points required by stream samplers before results are returned. **Default:** 32. -* training_data_size: optional. **Default:** size of your training data set. -* anomaly_score_threshold: optional. The threshold of anomaly score. **Default:** 1.0. -* category_field: optional. Specifies the category field used to group inputs. Each category will be independently predicted. - -Example 1: Detecting events in New York City from taxi ridership data with time-series data -=========================================================================================== - -This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data. - -PPL query:: - - > source=nyc_taxi | fields value, timestamp | AD time_field='timestamp' | where value=10844.0 - fetched rows / total rows = 1/1 - +---------+---------------------+-------+---------------+ - | value | timestamp | score | anomaly_grade | - |---------+---------------------+-------+---------------| - | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | - +---------+---------------------+-------+---------------+ - -Example 2: Detecting events in New York City from taxi ridership data with time-series data independently with each category -============================================================================================================================ - -This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data with multiple category values. - -PPL query:: - - > source=nyc_taxi | fields category, value, timestamp | AD time_field='timestamp' category_field='category' | where value=10844.0 or value=6526.0 - fetched rows / total rows = 2/2 - +----------+---------+---------------------+-------+---------------+ - | category | value | timestamp | score | anomaly_grade | - |----------+---------+---------------------+-------+---------------| - | night | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | - | day | 6526.0 | 2014-07-01 06:00:00 | 0.0 | 0.0 | - +----------+---------+---------------------+-------+---------------+ - - -Example 3: Detecting events in New York City from taxi ridership data with non-time-series data -=============================================================================================== - -This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data. - -PPL query:: - - > source=nyc_taxi | fields value | AD | where value=10844.0 - fetched rows / total rows = 1/1 - +---------+-------+-----------+ - | value | score | anomalous | - |---------+-------+-----------| - | 10844.0 | 0.0 | False | - +---------+-------+-----------+ - -Example 4: Detecting events in New York City from taxi ridership data with non-time-series data independently with each category -================================================================================================================================ - -This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data with multiple category values. - -PPL query:: - - > source=nyc_taxi | fields category, value | AD category_field='category' | where value=10844.0 or value=6526.0 - fetched rows / total rows = 2/2 - +----------+---------+-------+-----------+ - | category | value | score | anomalous | - |----------+---------+-------+-----------| - | night | 10844.0 | 0.0 | False | - | day | 6526.0 | 0.0 | False | - +----------+---------+-------+-----------+ - - -Limitations -=========== -The ``ad`` command can only work with ``plugins.calcite.enabled=false``. diff --git a/docs/user/ppl/cmd/append.md b/docs/user/ppl/cmd/append.md new file mode 100644 index 00000000000..6c765286c69 --- /dev/null +++ b/docs/user/ppl/cmd/append.md @@ -0,0 +1,63 @@ +# append + +## Description + +The `append` command appends the result of a sub-search and attaches it as additional rows to the bottom of the input search results (The main search). +The command aligns columns with the same field names and types. For different column fields between the main search and sub-search, NULL values are filled in the respective rows. +## Syntax + +append \ +* sub-search: mandatory. Executes PPL commands as a secondary search. + +## Limitations + +* **Schema Compatibility**: When fields with the same name exist between the main search and sub-search but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type, or use different field names (e.g., by renaming with `eval` or using `fields` to select non-conflicting columns). + +## Example 1: Append rows from a count aggregation to existing search result + +This example appends rows from "count by gender" to "sum by gender, state". + +```ppl +source=accounts | stats sum(age) by gender, state | sort -`sum(age)` | head 5 | append [ source=accounts | stats count(age) by gender ] +``` + +Expected output: + +```text +fetched rows / total rows = 6/6 ++----------+--------+-------+------------+ +| sum(age) | gender | state | count(age) | +|----------+--------+-------+------------| +| 36 | M | TN | null | +| 33 | M | MD | null | +| 32 | M | IL | null | +| 28 | F | VA | null | +| null | F | null | 1 | +| null | M | null | 3 | ++----------+--------+-------+------------+ +``` + +## Example 2: Append rows with merged column names + +This example appends rows from "sum by gender" to "sum by gender, state" with merged column of same field name and type. + +```ppl +source=accounts | stats sum(age) as sum by gender, state | sort -sum | head 5 | append [ source=accounts | stats sum(age) as sum by gender ] +``` + +Expected output: + +```text +fetched rows / total rows = 6/6 ++-----+--------+-------+ +| sum | gender | state | +|-----+--------+-------| +| 36 | M | TN | +| 33 | M | MD | +| 32 | M | IL | +| 28 | F | VA | +| 28 | F | null | +| 101 | M | null | ++-----+--------+-------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/append.rst b/docs/user/ppl/cmd/append.rst deleted file mode 100644 index 6afdda6e439..00000000000 --- a/docs/user/ppl/cmd/append.rst +++ /dev/null @@ -1,66 +0,0 @@ -====== -append -====== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``append`` command appends the result of a sub-search and attaches it as additional rows to the bottom of the input search results (The main search). -| The command aligns columns with the same field names and types. For different column fields between the main search and sub-search, NULL values are filled in the respective rows. - -Syntax -====== -append - -* sub-search: mandatory. Executes PPL commands as a secondary search. - -Limitations -=========== - -* **Schema Compatibility**: When fields with the same name exist between the main search and sub-search but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type, or use different field names (e.g., by renaming with ``eval`` or using ``fields`` to select non-conflicting columns). - -Example 1: Append rows from a count aggregation to existing search result -========================================================================= - -This example appends rows from "count by gender" to "sum by gender, state". - -PPL query:: - - os> source=accounts | stats sum(age) by gender, state | sort -`sum(age)` | head 5 | append [ source=accounts | stats count(age) by gender ]; - fetched rows / total rows = 6/6 - +----------+--------+-------+------------+ - | sum(age) | gender | state | count(age) | - |----------+--------+-------+------------| - | 36 | M | TN | null | - | 33 | M | MD | null | - | 32 | M | IL | null | - | 28 | F | VA | null | - | null | F | null | 1 | - | null | M | null | 3 | - +----------+--------+-------+------------+ - -Example 2: Append rows with merged column names -=============================================== - -This example appends rows from "sum by gender" to "sum by gender, state" with merged column of same field name and type. - -PPL query:: - - os> source=accounts | stats sum(age) as sum by gender, state | sort -sum | head 5 | append [ source=accounts | stats sum(age) as sum by gender ]; - fetched rows / total rows = 6/6 - +-----+--------+-------+ - | sum | gender | state | - |-----+--------+-------| - | 36 | M | TN | - | 33 | M | MD | - | 32 | M | IL | - | 28 | F | VA | - | 28 | F | null | - | 101 | M | null | - +-----+--------+-------+ diff --git a/docs/user/ppl/cmd/appendcol.md b/docs/user/ppl/cmd/appendcol.md new file mode 100644 index 00000000000..fb879c1b6f6 --- /dev/null +++ b/docs/user/ppl/cmd/appendcol.md @@ -0,0 +1,126 @@ +# appendcol + +## Description + +The `appendcol` command appends the result of a sub-search and attaches it alongside with the input search results (The main search). +## Syntax + +appendcol [override=\] \ +* override=: optional. Boolean field to specify should result from main-result be overwritten in the case of column name conflict. **Default:** false. +* sub-search: mandatory. Executes PPL commands as a secondary search. The sub-search uses the same data specified in the source clause of the main search results as its input. + +## Example 1: Append a count aggregation to existing search result + +This example appends "count by gender" to "sum by gender, state". + +```ppl +source=accounts +| stats sum(age) by gender, state +| appendcol [ stats count(age) by gender ] +| head 10 +``` + +Expected output: + +```text +fetched rows / total rows = 10/10 ++--------+-------+----------+------------+ +| gender | state | sum(age) | count(age) | +|--------+-------+----------+------------| +| F | AK | 317 | 493 | +| F | AL | 397 | 507 | +| F | AR | 229 | NULL | +| F | AZ | 238 | NULL | +| F | CA | 282 | NULL | +| F | CO | 217 | NULL | +| F | CT | 147 | NULL | +| F | DC | 358 | NULL | +| F | DE | 101 | NULL | +| F | FL | 310 | NULL | ++--------+-------+----------+------------+ +``` + +## Example 2: Append a count aggregation to existing search result with override option + +This example appends "count by gender" to "sum by gender, state" with override option. + +```ppl +source=accounts +| stats sum(age) by gender, state +| appendcol override=true [ stats count(age) by gender ] +| head 10 +``` + +Expected output: + +```text +fetched rows / total rows = 10/10 ++--------+-------+----------+------------+ +| gender | state | sum(age) | count(age) | +|--------+-------+----------+------------| +| F | AK | 317 | 493 | +| M | AL | 397 | 507 | +| F | AR | 229 | NULL | +| F | AZ | 238 | NULL | +| F | CA | 282 | NULL | +| F | CO | 217 | NULL | +| F | CT | 147 | NULL | +| F | DC | 358 | NULL | +| F | DE | 101 | NULL | +| F | FL | 310 | NULL | ++--------+-------+----------+------------+ +``` + +## Example 3: Append multiple sub-search results + +This example shows how to chain multiple appendcol commands to add columns from different sub-searches. + +```ppl +source=employees +| fields name, dept, age +| appendcol [ stats avg(age) as avg_age ] +| appendcol [ stats max(age) as max_age ] +``` + +Expected output: + +```text +fetched rows / total rows = 9/9 ++------+-------------+-----+------------------+---------+ +| name | dept | age | avg_age | max_age | +|------+-------------+-----+------------------+---------| +| Lisa | Sales | 35 | 31.2222222222222 | 38 | +| Fred | Engineering | 28 | NULL | NULL | +| Paul | Engineering | 23 | NULL | NULL | +| Evan | Sales | 38 | NULL | NULL | +| Chloe| Engineering | 25 | NULL | NULL | +| Tom | Engineering | 33 | NULL | NULL | +| Alex | Sales | 33 | NULL | NULL | +| Jane | Marketing | 28 | NULL | NULL | +| Jeff | Marketing | 38 | NULL | NULL | ++------+-------------+-----+------------------+---------+ +``` + +## Example 4: Override case of column name conflict + +This example demonstrates the override option when column names conflict between main search and sub-search. + +```ppl +source=employees +| stats avg(age) as agg by dept +| appendcol override=true [ stats max(age) as agg by dept ] +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+-------------+ +| agg | dept | +|-----+-------------| +| 38 | Sales | +| 38 | Engineering | +| 38 | Marketing | ++-----+-------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/appendcol.rst b/docs/user/ppl/cmd/appendcol.rst deleted file mode 100644 index a9cb714256b..00000000000 --- a/docs/user/ppl/cmd/appendcol.rst +++ /dev/null @@ -1,110 +0,0 @@ -========= -appendcol -========= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -============ -The ``appendcol`` command appends the result of a sub-search and attaches it alongside with the input search results (The main search). - -Syntax -====== -appendcol [override=] - -* override=: optional. Boolean field to specify should result from main-result be overwritten in the case of column name conflict. **Default:** false. -* sub-search: mandatory. Executes PPL commands as a secondary search. The sub-search uses the same data specified in the source clause of the main search results as its input. - -Example 1: Append a count aggregation to existing search result -=============================================================== - -This example appends "count by gender" to "sum by gender, state". - -PPL query:: - - PPL> source=accounts | stats sum(age) by gender, state | appendcol [ stats count(age) by gender ] | head 10; - fetched rows / total rows = 10/10 - +--------+-------+----------+------------+ - | gender | state | sum(age) | count(age) | - |--------+-------+----------+------------| - | F | AK | 317 | 493 | - | F | AL | 397 | 507 | - | F | AR | 229 | NULL | - | F | AZ | 238 | NULL | - | F | CA | 282 | NULL | - | F | CO | 217 | NULL | - | F | CT | 147 | NULL | - | F | DC | 358 | NULL | - | F | DE | 101 | NULL | - | F | FL | 310 | NULL | - +--------+-------+----------+------------+ - -Example 2: Append a count aggregation to existing search result with override option -==================================================================================== - -This example appends "count by gender" to "sum by gender, state" with override option. - -PPL query:: - - PPL> source=accounts | stats sum(age) by gender, state | appendcol override=true [ stats count(age) by gender ] | head 10; - fetched rows / total rows = 10/10 - +--------+-------+----------+------------+ - | gender | state | sum(age) | count(age) | - |--------+-------+----------+------------| - | F | AK | 317 | 493 | - | M | AL | 397 | 507 | - | F | AR | 229 | NULL | - | F | AZ | 238 | NULL | - | F | CA | 282 | NULL | - | F | CO | 217 | NULL | - | F | CT | 147 | NULL | - | F | DC | 358 | NULL | - | F | DE | 101 | NULL | - | F | FL | 310 | NULL | - +--------+-------+----------+------------+ - -Example 3: Append multiple sub-search results -============================================= - -This example shows how to chain multiple appendcol commands to add columns from different sub-searches. - -PPL query:: - - PPL> source=employees | fields name, dept, age | appendcol [ stats avg(age) as avg_age ] | appendcol [ stats max(age) as max_age ]; - fetched rows / total rows = 9/9 - +------+-------------+-----+------------------+---------+ - | name | dept | age | avg_age | max_age | - |------+-------------+-----+------------------+---------| - | Lisa | Sales | 35 | 31.2222222222222 | 38 | - | Fred | Engineering | 28 | NULL | NULL | - | Paul | Engineering | 23 | NULL | NULL | - | Evan | Sales | 38 | NULL | NULL | - | Chloe| Engineering | 25 | NULL | NULL | - | Tom | Engineering | 33 | NULL | NULL | - | Alex | Sales | 33 | NULL | NULL | - | Jane | Marketing | 28 | NULL | NULL | - | Jeff | Marketing | 38 | NULL | NULL | - +------+-------------+-----+------------------+---------+ - -Example 4: Override case of column name conflict -================================================ - -This example demonstrates the override option when column names conflict between main search and sub-search. - -PPL query:: - - PPL> source=employees | stats avg(age) as agg by dept | appendcol override=true [ stats max(age) as agg by dept ]; - fetched rows / total rows = 3/3 - +-----+-------------+ - | agg | dept | - |-----+-------------| - | 38 | Sales | - | 38 | Engineering | - | 38 | Marketing | - +-----+-------------+ - diff --git a/docs/user/ppl/cmd/appendpipe.md b/docs/user/ppl/cmd/appendpipe.md new file mode 100644 index 00000000000..f2dc71a2abc --- /dev/null +++ b/docs/user/ppl/cmd/appendpipe.md @@ -0,0 +1,70 @@ +# appendpipe + +## Description + +The `appendpipe` command appends the result of the subpipeline to the search results. Unlike a subsearch, the subpipeline is not run first.The subpipeline is run when the search reaches the appendpipe command. +The command aligns columns with the same field names and types. For different column fields between the main search and sub-search, NULL values are filled in the respective rows. +## Syntax + +appendpipe [\] +* subpipeline: mandatory. A list of commands that are applied to the search results from the commands that occur in the search before the `appendpipe` command. + +## Example 1: Append rows from a total count to existing search result + +This example appends rows from "total by gender" to "sum by gender, state" with merged column of same field name and type. + +```ppl +source=accounts +| stats sum(age) as part by gender, state +| sort -part +| head 5 +| appendpipe [ stats sum(part) as total by gender ] +``` + +Expected output: + +```text +fetched rows / total rows = 6/6 ++------+--------+-------+-------+ +| part | gender | state | total | +|------+--------+-------+-------| +| 36 | M | TN | null | +| 33 | M | MD | null | +| 32 | M | IL | null | +| 28 | F | VA | null | +| null | F | null | 28 | +| null | M | null | 101 | ++------+--------+-------+-------+ +``` + +## Example 2: Append rows with merged column names + +This example appends rows from "count by gender" to "sum by gender, state". + +```ppl +source=accounts +| stats sum(age) as total by gender, state +| sort -total +| head 5 +| appendpipe [ stats sum(total) as total by gender ] +``` + +Expected output: + +```text +fetched rows / total rows = 6/6 ++----------+--------+-------+ +| total | gender | state | +|----------+--------+-------| +| 36 | M | TN | +| 33 | M | MD | +| 32 | M | IL | +| 28 | F | VA | +| 28 | F | null | +| 101 | M | null | ++----------+--------+-------+ +``` + +## Limitations + +* **Schema Compatibility**: Same as command `append`, when fields with the same name exist between the main search and sub-search but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type, or use different field names (e.g., by renaming with `eval` or using `fields` to select non-conflicting columns). \ No newline at end of file diff --git a/docs/user/ppl/cmd/appendpipe.rst b/docs/user/ppl/cmd/appendpipe.rst deleted file mode 100644 index c309517724a..00000000000 --- a/docs/user/ppl/cmd/appendpipe.rst +++ /dev/null @@ -1,68 +0,0 @@ -========= -appendpipe -========= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -============ -| The ``appendpipe`` command appends the result of the subpipeline to the search results. Unlike a subsearch, the subpipeline is not run first.The subpipeline is run when the search reaches the appendpipe command. -The command aligns columns with the same field names and types. For different column fields between the main search and sub-search, NULL values are filled in the respective rows. - -Syntax -============ -appendpipe [] - -* subpipeline: mandatory. A list of commands that are applied to the search results from the commands that occur in the search before the ``appendpipe`` command. - -Example 1: Append rows from a total count to existing search result -==================================================================================== - -This example appends rows from "total by gender" to "sum by gender, state" with merged column of same field name and type. - -PPL query:: - - os> source=accounts | stats sum(age) as part by gender, state | sort -part | head 5 | appendpipe [ stats sum(part) as total by gender ]; - fetched rows / total rows = 6/6 - +------+--------+-------+-------+ - | part | gender | state | total | - |------+--------+-------+-------| - | 36 | M | TN | null | - | 33 | M | MD | null | - | 32 | M | IL | null | - | 28 | F | VA | null | - | null | F | null | 28 | - | null | M | null | 101 | - +------+--------+-------+-------+ - - - -Example 2: Append rows with merged column names -=============================================================== - -This example appends rows from "count by gender" to "sum by gender, state". - -PPL query:: - - os> source=accounts | stats sum(age) as total by gender, state | sort -total | head 5 | appendpipe [ stats sum(total) as total by gender ]; - fetched rows / total rows = 6/6 - +----------+--------+-------+ - | total | gender | state | - |----------+--------+-------| - | 36 | M | TN | - | 33 | M | MD | - | 32 | M | IL | - | 28 | F | VA | - | 28 | F | null | - | 101 | M | null | - +----------+--------+-------+ - -Limitations -=========== - -* **Schema Compatibility**: Same as command ``append``, when fields with the same name exist between the main search and sub-search but have incompatible types, the query will fail with an error. To avoid type conflicts, ensure that fields with the same name have the same data type, or use different field names (e.g., by renaming with ``eval`` or using ``fields`` to select non-conflicting columns). diff --git a/docs/user/ppl/cmd/bin.md b/docs/user/ppl/cmd/bin.md new file mode 100644 index 00000000000..4fc81d0f0fd --- /dev/null +++ b/docs/user/ppl/cmd/bin.md @@ -0,0 +1,469 @@ +# bin + +## Description + +The `bin` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. +## Syntax + +bin \ [span=\] [minspan=\] [bins=\] [aligntime=(earliest \| latest \| \)] [start=\] [end=\] +* field: mandatory. The field to bin. Accepts numeric or time-based fields. +* span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters. + * Supports numeric (e.g., `1000`), logarithmic (e.g., `log10`, `2log10`), and time intervals + * Available time units: + * microsecond (us) + * millisecond (ms) + * centisecond (cs) + * decisecond (ds) + * second (s, sec, secs, second, seconds) + * minute (m, min, mins, minute, minutes) + * hour (h, hr, hrs, hour, hours) + * day (d, day, days) + * month (mon, month, months) +* minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. +* bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. The bins parameter must be between 2 and 50000 (inclusive). +* aligntime: optional. Align the bin times for time-based fields. Valid only for time-based discretization. Options: + * earliest: Align bins to the earliest timestamp in the data + * latest: Align bins to the latest timestamp in the data + * \: Align bins to a specific epoch time value or time modifier expression +* start: optional. The starting value for binning range. **Default:** minimum field value. +* end: optional. The ending value for binning range. **Default:** maximum field value. + +**Parameter Behavior** +When multiple parameters are specified, priority order is: span > minspan > bins > start/end > default. +**Special Behaviors:** +* Logarithmic span (`log10`, `2log10`, etc.) creates logarithmic bin boundaries instead of linear +* Daily/monthly spans automatically align to calendar boundaries and return date strings (YYYY-MM-DD) instead of timestamps +* aligntime parameter only applies to time spans excluding days/months +* start/end parameters expand the range (never shrink) and affect bin width calculation + +## Example 1: Basic numeric span + +```ppl +source=accounts +| bin age span=10 +| fields age, account_number +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-------+----------------+ +| age | account_number | +|-------+----------------| +| 30-40 | 1 | +| 30-40 | 6 | +| 20-30 | 13 | ++-------+----------------+ +``` + +## Example 2: Large numeric span + +```ppl +source=accounts +| bin balance span=25000 +| fields balance +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-------------+ +| balance | +|-------------| +| 25000-50000 | +| 0-25000 | ++-------------+ +``` + +## Example 3: Logarithmic span (log10) + +```ppl +source=accounts +| bin balance span=log10 +| fields balance +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++------------------+ +| balance | +|------------------| +| 10000.0-100000.0 | +| 1000.0-10000.0 | ++------------------+ +``` + +## Example 4: Logarithmic span with coefficient + +```ppl +source=accounts +| bin balance span=2log10 +| fields balance +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++------------------+ +| balance | +|------------------| +| 20000.0-200000.0 | +| 2000.0-20000.0 | +| 20000.0-200000.0 | ++------------------+ +``` + +## Example 5: Basic bins parameter + +```ppl +source=time_test +| bin value bins=5 +| fields value +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++------------+ +| value | +|------------| +| 8000-9000 | +| 7000-8000 | +| 9000-10000 | ++------------+ +``` + +## Example 6: Low bin count + +```ppl +source=accounts +| bin age bins=2 +| fields age +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------+ +| age | +|-------| +| 30-40 | ++-------+ +``` + +## Example 7: High bin count + +```ppl +source=accounts +| bin age bins=21 +| fields age, account_number +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-------+----------------+ +| age | account_number | +|-------+----------------| +| 32-33 | 1 | +| 36-37 | 6 | +| 28-29 | 13 | ++-------+----------------+ +``` + +## Example 8: Basic minspan + +```ppl +source=accounts +| bin age minspan=5 +| fields age, account_number +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-------+----------------+ +| age | account_number | +|-------+----------------| +| 30-40 | 1 | +| 30-40 | 6 | +| 20-30 | 13 | ++-------+----------------+ +``` + +## Example 9: Large minspan + +```ppl +source=accounts +| bin age minspan=101 +| fields age +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| age | +|--------| +| 0-1000 | ++--------+ +``` + +## Example 10: Start and end range + +```ppl +source=accounts +| bin age start=0 end=101 +| fields age +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------+ +| age | +|-------| +| 0-100 | ++-------+ +``` + +## Example 11: Large end range + +```ppl +source=accounts +| bin balance start=0 end=100001 +| fields balance +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| balance | +|----------| +| 0-100000 | ++----------+ +``` + +## Example 12: Span with start/end + +```ppl +source=accounts +| bin age span=1 start=25 end=35 +| fields age +| head 6 +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-------+ +| age | +|-------| +| 32-33 | +| 36-37 | +| 28-29 | +| 33-34 | ++-------+ +``` + +## Example 13: Hour span + +```ppl +source=time_test +| bin @timestamp span=1h +| fields @timestamp, value +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++---------------------+-------+ +| @timestamp | value | +|---------------------+-------| +| 2025-07-28 00:00:00 | 8945 | +| 2025-07-28 01:00:00 | 7623 | +| 2025-07-28 02:00:00 | 9187 | ++---------------------+-------+ +``` + +## Example 14: Minute span + +```ppl +source=time_test +| bin @timestamp span=45minute +| fields @timestamp, value +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++---------------------+-------+ +| @timestamp | value | +|---------------------+-------| +| 2025-07-28 00:00:00 | 8945 | +| 2025-07-28 01:30:00 | 7623 | +| 2025-07-28 02:15:00 | 9187 | ++---------------------+-------+ +``` + +## Example 15: Second span + +```ppl +source=time_test +| bin @timestamp span=30seconds +| fields @timestamp, value +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++---------------------+-------+ +| @timestamp | value | +|---------------------+-------| +| 2025-07-28 00:15:30 | 8945 | +| 2025-07-28 01:42:00 | 7623 | +| 2025-07-28 02:28:30 | 9187 | ++---------------------+-------+ +``` + +## Example 16: Daily span + +```ppl +source=time_test +| bin @timestamp span=7day +| fields @timestamp, value +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++---------------------+-------+ +| @timestamp | value | +|---------------------+-------| +| 2025-07-24 00:00:00 | 8945 | +| 2025-07-24 00:00:00 | 7623 | +| 2025-07-24 00:00:00 | 9187 | ++---------------------+-------+ +``` + +## Example 17: Aligntime with time modifier + +```ppl +source=time_test +| bin @timestamp span=2h aligntime='@d+3h' +| fields @timestamp, value +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++---------------------+-------+ +| @timestamp | value | +|---------------------+-------| +| 2025-07-27 23:00:00 | 8945 | +| 2025-07-28 01:00:00 | 7623 | +| 2025-07-28 01:00:00 | 9187 | ++---------------------+-------+ +``` + +## Example 18: Aligntime with epoch timestamp + +```ppl +source=time_test +| bin @timestamp span=2h aligntime=1500000000 +| fields @timestamp, value +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++---------------------+-------+ +| @timestamp | value | +|---------------------+-------| +| 2025-07-27 22:40:00 | 8945 | +| 2025-07-28 00:40:00 | 7623 | +| 2025-07-28 00:40:00 | 9187 | ++---------------------+-------+ +``` + +## Example 19: Default behavior (no parameters) + +```ppl +source=accounts +| bin age +| fields age, account_number +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----------+----------------+ +| age | account_number | +|-----------+----------------| +| 32.0-33.0 | 1 | +| 36.0-37.0 | 6 | +| 28.0-29.0 | 13 | ++-----------+----------------+ +``` + +## Example 20: Binning with string fields + +```ppl +source=accounts +| eval age_str = CAST(age AS STRING) +| bin age_str bins=3 +| stats count() by age_str +| sort age_str +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++---------+---------+ +| count() | age_str | +|---------+---------| +| 1 | 20-30 | +| 3 | 30-40 | ++---------+---------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/bin.rst b/docs/user/ppl/cmd/bin.rst deleted file mode 100644 index f42fb1da84f..00000000000 --- a/docs/user/ppl/cmd/bin.rst +++ /dev/null @@ -1,348 +0,0 @@ -=== -bin -=== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -============ -| The ``bin`` command groups numeric values into buckets of equal intervals, making it useful for creating histograms and analyzing data distribution. It takes a numeric or time-based field and generates a new field with values that represent the lower bound of each bucket. - -Syntax -====== -bin [span=] [minspan=] [bins=] [aligntime=(earliest | latest | )] [start=] [end=] - -* field: mandatory. The field to bin. Accepts numeric or time-based fields. -* span: optional. The interval size for each bin. Cannot be used with bins or minspan parameters. - - * Supports numeric (e.g., ``1000``), logarithmic (e.g., ``log10``, ``2log10``), and time intervals - * Available time units: - - * microsecond (us) - * millisecond (ms) - * centisecond (cs) - * decisecond (ds) - * second (s, sec, secs, second, seconds) - * minute (m, min, mins, minute, minutes) - * hour (h, hr, hrs, hour, hours) - * day (d, day, days) - * month (mon, month, months) - -* minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters. -* bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. The bins parameter must be between 2 and 50000 (inclusive). -* aligntime: optional. Align the bin times for time-based fields. Valid only for time-based discretization. Options: - - * earliest: Align bins to the earliest timestamp in the data - * latest: Align bins to the latest timestamp in the data - * : Align bins to a specific epoch time value or time modifier expression - -* start: optional. The starting value for binning range. **Default:** minimum field value. -* end: optional. The ending value for binning range. **Default:** maximum field value. - -**Parameter Behavior** - -When multiple parameters are specified, priority order is: span > minspan > bins > start/end > default. - -**Special Behaviors:** -* Logarithmic span (``log10``, ``2log10``, etc.) creates logarithmic bin boundaries instead of linear -* Daily/monthly spans automatically align to calendar boundaries and return date strings (YYYY-MM-DD) instead of timestamps -* aligntime parameter only applies to time spans excluding days/months -* start/end parameters expand the range (never shrink) and affect bin width calculation - -Example 1: Basic numeric span -============================= - -PPL query:: - - os> source=accounts | bin age span=10 | fields age, account_number | head 3; - fetched rows / total rows = 3/3 - +-------+----------------+ - | age | account_number | - |-------+----------------| - | 30-40 | 1 | - | 30-40 | 6 | - | 20-30 | 13 | - +-------+----------------+ - -Example 2: Large numeric span -============================== - -PPL query:: - - os> source=accounts | bin balance span=25000 | fields balance | head 2; - fetched rows / total rows = 2/2 - +-------------+ - | balance | - |-------------| - | 25000-50000 | - | 0-25000 | - +-------------+ - - -Example 3: Logarithmic span (log10) -=================================== - -PPL query:: - - os> source=accounts | bin balance span=log10 | fields balance | head 2; - fetched rows / total rows = 2/2 - +------------------+ - | balance | - |------------------| - | 10000.0-100000.0 | - | 1000.0-10000.0 | - +------------------+ - -Example 4: Logarithmic span with coefficient -============================================ - -PPL query:: - - os> source=accounts | bin balance span=2log10 | fields balance | head 3; - fetched rows / total rows = 3/3 - +------------------+ - | balance | - |------------------| - | 20000.0-200000.0 | - | 2000.0-20000.0 | - | 20000.0-200000.0 | - +------------------+ - -Example 5: Basic bins parameter -=============================== - -PPL query:: - - os> source=time_test | bin value bins=5 | fields value | head 3; - fetched rows / total rows = 3/3 - +------------+ - | value | - |------------| - | 8000-9000 | - | 7000-8000 | - | 9000-10000 | - +------------+ - -Example 6: Low bin count -======================== - -PPL query:: - - os> source=accounts | bin age bins=2 | fields age | head 1; - fetched rows / total rows = 1/1 - +-------+ - | age | - |-------| - | 30-40 | - +-------+ - -Example 7: High bin count -========================= - -PPL query:: - - os> source=accounts | bin age bins=21 | fields age, account_number | head 3; - fetched rows / total rows = 3/3 - +-------+----------------+ - | age | account_number | - |-------+----------------| - | 32-33 | 1 | - | 36-37 | 6 | - | 28-29 | 13 | - +-------+----------------+ - -Example 8: Basic minspan -======================== - -PPL query:: - - os> source=accounts | bin age minspan=5 | fields age, account_number | head 3; - fetched rows / total rows = 3/3 - +-------+----------------+ - | age | account_number | - |-------+----------------| - | 30-40 | 1 | - | 30-40 | 6 | - | 20-30 | 13 | - +-------+----------------+ - -Example 9: Large minspan -======================== - -PPL query:: - - os> source=accounts | bin age minspan=101 | fields age | head 1; - fetched rows / total rows = 1/1 - +--------+ - | age | - |--------| - | 0-1000 | - +--------+ - -Example 10: Start and end range -=============================== - -PPL query:: - - os> source=accounts | bin age start=0 end=101 | fields age | head 1; - fetched rows / total rows = 1/1 - +-------+ - | age | - |-------| - | 0-100 | - +-------+ - -Example 11: Large end range -=========================== - -PPL query:: - - os> source=accounts | bin balance start=0 end=100001 | fields balance | head 1; - fetched rows / total rows = 1/1 - +----------+ - | balance | - |----------| - | 0-100000 | - +----------+ - -Example 12: Span with start/end -=============================== - -PPL query:: - - os> source=accounts | bin age span=1 start=25 end=35 | fields age | head 6; - fetched rows / total rows = 4/4 - +-------+ - | age | - |-------| - | 32-33 | - | 36-37 | - | 28-29 | - | 33-34 | - +-------+ - -Example 13: Hour span -===================== - -PPL query:: - - os> source=time_test | bin @timestamp span=1h | fields @timestamp, value | head 3; - fetched rows / total rows = 3/3 - +---------------------+-------+ - | @timestamp | value | - |---------------------+-------| - | 2025-07-28 00:00:00 | 8945 | - | 2025-07-28 01:00:00 | 7623 | - | 2025-07-28 02:00:00 | 9187 | - +---------------------+-------+ - -Example 14: Minute span -======================= - -PPL query:: - - os> source=time_test | bin @timestamp span=45minute | fields @timestamp, value | head 3; - fetched rows / total rows = 3/3 - +---------------------+-------+ - | @timestamp | value | - |---------------------+-------| - | 2025-07-28 00:00:00 | 8945 | - | 2025-07-28 01:30:00 | 7623 | - | 2025-07-28 02:15:00 | 9187 | - +---------------------+-------+ - -Example 15: Second span -======================= - -PPL query:: - - os> source=time_test | bin @timestamp span=30seconds | fields @timestamp, value | head 3; - fetched rows / total rows = 3/3 - +---------------------+-------+ - | @timestamp | value | - |---------------------+-------| - | 2025-07-28 00:15:30 | 8945 | - | 2025-07-28 01:42:00 | 7623 | - | 2025-07-28 02:28:30 | 9187 | - +---------------------+-------+ - -Example 16: Daily span -====================== - -PPL query:: - - os> source=time_test | bin @timestamp span=7day | fields @timestamp, value | head 3; - fetched rows / total rows = 3/3 - +---------------------+-------+ - | @timestamp | value | - |---------------------+-------| - | 2025-07-24 00:00:00 | 8945 | - | 2025-07-24 00:00:00 | 7623 | - | 2025-07-24 00:00:00 | 9187 | - +---------------------+-------+ - -Example 17: Aligntime with time modifier -======================================== - -PPL query:: - - os> source=time_test | bin @timestamp span=2h aligntime='@d+3h' | fields @timestamp, value | head 3; - fetched rows / total rows = 3/3 - +---------------------+-------+ - | @timestamp | value | - |---------------------+-------| - | 2025-07-27 23:00:00 | 8945 | - | 2025-07-28 01:00:00 | 7623 | - | 2025-07-28 01:00:00 | 9187 | - +---------------------+-------+ - -Example 18: Aligntime with epoch timestamp -========================================== - -PPL query:: - - os> source=time_test | bin @timestamp span=2h aligntime=1500000000 | fields @timestamp, value | head 3; - fetched rows / total rows = 3/3 - +---------------------+-------+ - | @timestamp | value | - |---------------------+-------| - | 2025-07-27 22:40:00 | 8945 | - | 2025-07-28 00:40:00 | 7623 | - | 2025-07-28 00:40:00 | 9187 | - +---------------------+-------+ - -Example 19: Default behavior (no parameters) -============================================ - -PPL query:: - - os> source=accounts | bin age | fields age, account_number | head 3; - fetched rows / total rows = 3/3 - +-----------+----------------+ - | age | account_number | - |-----------+----------------| - | 32.0-33.0 | 1 | - | 36.0-37.0 | 6 | - | 28.0-29.0 | 13 | - +-----------+----------------+ - - -Example 20: Binning with string fields -============================================== - -PPL query:: - - os> source=accounts | eval age_str = CAST(age AS STRING) | bin age_str bins=3 | stats count() by age_str | sort age_str; - fetched rows / total rows = 2/2 - +---------+---------+ - | count() | age_str | - |---------+---------| - | 1 | 20-30 | - | 3 | 30-40 | - +---------+---------+ - diff --git a/docs/user/ppl/cmd/chart.md b/docs/user/ppl/cmd/chart.md new file mode 100644 index 00000000000..829afdedb78 --- /dev/null +++ b/docs/user/ppl/cmd/chart.md @@ -0,0 +1,200 @@ +# chart + +## Description + +The `chart` command transforms search results by applying a statistical aggregation function and optionally grouping the data by one or two fields. The results are suitable for visualization as a two-dimension chart when grouping by two fields, where unique values in the second group key can be pivoted to column names. +## Syntax + +chart [limit=(top\|bottom) \] [useother=\] [usenull=\] [nullstr=\] [otherstr=\] \ [ by \ \ ] \| [over \ ] [ by \] +* limit: optional. Specifies the number of categories to display when using column split. Each unique value in the column split field represents a category. **Default:** top10. + * Syntax: `limit=(top|bottom)` or `limit=` (defaults to top) + * When `limit=K` is set, the top or bottom K categories from the column split field are retained; the remaining categories are grouped into an "OTHER" category if `useother` is not set to false. + * Set limit to 0 to show all categories without any limit. + * Use `limit=topK` or `limit=bottomK` to specify whether to retain the top or bottom K column categories. The ranking is based on the sum of aggregated values for each column category. For example, `chart limit=top3 count() by region, product` keeps the 3 products with the highest total counts across all regions. If not specified, top is used by default. + * Only applies when column split is present (by 2 fields or over...by... coexists). +* useother: optional. Controls whether to create an "OTHER" category for categories beyond the limit. **Default:** true + * When set to false, only the top/bottom N categories (based on limit) are shown without an "OTHER" category. + * When set to true, categories beyond the limit are grouped into an "OTHER" category. + * Only applies when using column split and when there are more categories than the limit. +* usenull: optional. Controls whether to group events without a column split (i.e. whose column split is null) into a separate "NULL" category. **Default:** true + * `usenull` only applies to column split. + * Row split should always be non-null value. Documents with null values in row split will be ignored. + * When `usenull=false`, events with a null column split are excluded from results. + * When `usenull=true`, events with a null column split are grouped into a separate "NULL" category. +* nullstr: optional. Specifies the category name for rows that do not contain the column split value. **Default:** "NULL" + * Only applies when `usenull` is set to true. +* otherstr: optional. Specifies the category name for the "OTHER" category. **Default:** "OTHER" + * Only applies when `useother` is set to true and there are values beyond the limit. +* aggregation_function: mandatory. The aggregation function to apply to the data. + * Currently, only a single aggregation function is supported. + * Available functions: aggregation functions supported by the stats command. +* by: optional. Groups the results by either one field (row split) or two fields (row split and column split) + * `limit`, `useother`, and `usenull` apply to the column split + * Results are returned as individual rows for each combination. + * If not specified, the aggregation is performed across all documents. +* over...by...: optional. Alternative syntax for grouping by multiple fields. + * `over by ` groups the results by both fields. + * Using `over` alone on one field is equivalent to `by ` + +## Notes + +* The fields generated by column splitting are converted to strings so that they are compatible with `nullstr` and `otherstr` and can be used as column names once pivoted. +* Documents with null values in fields used by the aggregation function are excluded from aggregation. For example, in `chart avg(balance) over deptno, group`, documents where `balance` is null are excluded from the average calculation. +* The aggregation metric appears as the last column in the result. Result columns are ordered as: [row-split] [column-split] [aggregation-metrics]. + +## Example 1: Basic aggregation without grouping + +This example calculates the average balance across all accounts. + +```ppl +source=accounts +| chart avg(balance) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| avg(balance) | +|--------------| +| 20482.25 | ++--------------+ +``` + +## Example 2: Group by single field + +This example calculates the count of accounts grouped by gender. + +```ppl +source=accounts +| chart count() by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+---------+ +| gender | count() | +|--------+---------| +| F | 1 | +| M | 3 | ++--------+---------+ +``` + +## Example 3: Using over and by for multiple field grouping + +This example shows average balance grouped by both gender and age fields. Note that the age column in the result is converted to string type. + +```ppl +source=accounts +| chart avg(balance) over gender by age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+-----+--------------+ +| gender | age | avg(balance) | +|--------+-----+--------------| +| F | 28 | 32838.0 | +| M | 32 | 39225.0 | +| M | 33 | 4180.0 | +| M | 36 | 5686.0 | ++--------+-----+--------------+ +``` + +## Example 4: Using basic limit functionality + +This example limits the results to show only the top 1 age group. Note that the age column in the result is converted to string type. + +```ppl +source=accounts +| chart limit=1 count() over gender by age +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++--------+-------+---------+ +| gender | age | count() | +|--------+-------+---------| +| F | OTHER | 1 | +| M | 33 | 1 | +| M | OTHER | 2 | ++--------+-------+---------+ +``` + +## Example 5: Using limit with other parameters + +This example shows using limit with useother and custom otherstr parameters. + +```ppl +source=accounts +| chart limit=top1 useother=true otherstr='minor_gender' count() over state by gender +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-------+--------------+---------+ +| state | gender | count() | +|-------+--------------+---------| +| IL | M | 1 | +| MD | M | 1 | +| TN | M | 1 | +| VA | minor_gender | 1 | ++-------+--------------+---------+ +``` + +## Example 6: Using null parameters + +This example shows using limit with usenull and custom nullstr parameters. + +```ppl +source=accounts +| chart usenull=true nullstr='employer not specified' count() over firstname by employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+------------------------+---------+ +| firstname | employer | count() | +|-----------+------------------------+---------| +| Amber | Pyrami | 1 | +| Dale | employer not specified | 1 | +| Hattie | Netagy | 1 | +| Nanette | Quility | 1 | ++-----------+------------------------+---------+ +``` + +## Example 7: Using chart command with span + +This example demonstrates using span for grouping age ranges. + +```ppl +source=accounts +| chart max(balance) by age span=10, gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----+--------+--------------+ +| age | gender | max(balance) | +|-----+--------+--------------| +| 20 | F | 32838 | +| 30 | M | 39225 | ++-----+--------+--------------+ +``` + +## Limitations + +* Only a single aggregation function is supported per chart command. \ No newline at end of file diff --git a/docs/user/ppl/cmd/chart.rst b/docs/user/ppl/cmd/chart.rst deleted file mode 100644 index 4ffe3e7abef..00000000000 --- a/docs/user/ppl/cmd/chart.rst +++ /dev/null @@ -1,193 +0,0 @@ -===== -chart -===== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== - -The ``chart`` command transforms search results by applying a statistical aggregation function and optionally grouping the data by one or two fields. The results are suitable for visualization as a two-dimension chart when grouping by two fields, where unique values in the second group key can be pivoted to column names. - -Syntax -====== -chart [limit=(top|bottom) ] [useother=] [usenull=] [nullstr=] [otherstr=] [ by ] | [over ] [ by ] - -* limit: optional. Specifies the number of categories to display when using column split. Each unique value in the column split field represents a category. **Default:** top10. - - * Syntax: ``limit=(top|bottom)`` or ``limit=`` (defaults to top) - * When ``limit=K`` is set, the top or bottom K categories from the column split field are retained; the remaining categories are grouped into an "OTHER" category if ``useother`` is not set to false. - * Set limit to 0 to show all categories without any limit. - * Use ``limit=topK`` or ``limit=bottomK`` to specify whether to retain the top or bottom K column categories. The ranking is based on the sum of aggregated values for each column category. For example, ``chart limit=top3 count() by region, product`` keeps the 3 products with the highest total counts across all regions. If not specified, top is used by default. - * Only applies when column split is present (by 2 fields or over...by... coexists). - -* useother: optional. Controls whether to create an "OTHER" category for categories beyond the limit. **Default:** true - - * When set to false, only the top/bottom N categories (based on limit) are shown without an "OTHER" category. - * When set to true, categories beyond the limit are grouped into an "OTHER" category. - * Only applies when using column split and when there are more categories than the limit. - -* usenull: optional. Controls whether to group events without a column split (i.e. whose column split is null) into a separate "NULL" category. **Default:** true - - * ``usenull`` only applies to column split. - * Row split should always be non-null value. Documents with null values in row split will be ignored. - * When ``usenull=false``, events with a null column split are excluded from results. - * When ``usenull=true``, events with a null column split are grouped into a separate "NULL" category. - -* nullstr: optional. Specifies the category name for rows that do not contain the column split value. **Default:** "NULL" - - * Only applies when ``usenull`` is set to true. - -* otherstr: optional. Specifies the category name for the "OTHER" category. **Default:** "OTHER" - - * Only applies when ``useother`` is set to true and there are values beyond the limit. - -* aggregation_function: mandatory. The aggregation function to apply to the data. - - * Currently, only a single aggregation function is supported. - * Available functions: aggregation functions supported by the stats command. - -* by: optional. Groups the results by either one field (row split) or two fields (row split and column split) - - * ``limit``, ``useother``, and ``usenull`` apply to the column split - * Results are returned as individual rows for each combination. - * If not specified, the aggregation is performed across all documents. - -* over...by...: optional. Alternative syntax for grouping by multiple fields. - - * ``over by `` groups the results by both fields. - * Using ``over`` alone on one field is equivalent to ``by `` - -Notes -===== - -* The fields generated by column splitting are converted to strings so that they are compatible with ``nullstr`` and ``otherstr`` and can be used as column names once pivoted. -* Documents with null values in fields used by the aggregation function are excluded from aggregation. For example, in ``chart avg(balance) over deptno, group``, documents where ``balance`` is null are excluded from the average calculation. -* The aggregation metric appears as the last column in the result. Result columns are ordered as: [row-split] [column-split] [aggregation-metrics]. - -Example 1: Basic aggregation without grouping -============================================= - -This example calculates the average balance across all accounts. - -PPL query:: - - os> source=accounts | chart avg(balance) - fetched rows / total rows = 1/1 - +--------------+ - | avg(balance) | - |--------------| - | 20482.25 | - +--------------+ - -Example 2: Group by single field -================================ - -This example calculates the count of accounts grouped by gender. - -PPL query:: - - os> source=accounts | chart count() by gender - fetched rows / total rows = 2/2 - +--------+---------+ - | gender | count() | - |--------+---------| - | F | 1 | - | M | 3 | - +--------+---------+ - -Example 3: Using over and by for multiple field grouping -======================================================== - -This example shows average balance grouped by both gender and age fields. Note that the age column in the result is converted to string type. - -PPL query:: - - os> source=accounts | chart avg(balance) over gender by age - fetched rows / total rows = 4/4 - +--------+-----+--------------+ - | gender | age | avg(balance) | - |--------+-----+--------------| - | F | 28 | 32838.0 | - | M | 32 | 39225.0 | - | M | 33 | 4180.0 | - | M | 36 | 5686.0 | - +--------+-----+--------------+ - -Example 4: Using basic limit functionality -========================================== - -This example limits the results to show only the top 1 age group. Note that the age column in the result is converted to string type. - -PPL query:: - - os> source=accounts | chart limit=1 count() over gender by age - fetched rows / total rows = 3/3 - +--------+-------+---------+ - | gender | age | count() | - |--------+-------+---------| - | F | OTHER | 1 | - | M | 33 | 1 | - | M | OTHER | 2 | - +--------+-------+---------+ - -Example 5: Using limit with other parameters -============================================ - -This example shows using limit with useother and custom otherstr parameters. - -PPL query:: - - os> source=accounts | chart limit=top1 useother=true otherstr='minor_gender' count() over state by gender - fetched rows / total rows = 4/4 - +-------+--------------+---------+ - | state | gender | count() | - |-------+--------------+---------| - | IL | M | 1 | - | MD | M | 1 | - | TN | M | 1 | - | VA | minor_gender | 1 | - +-------+--------------+---------+ - -Example 6: Using null parameters -================================ - -This example shows using limit with usenull and custom nullstr parameters. - -PPL query:: - - os> source=accounts | chart usenull=true nullstr='employer not specified' count() over firstname by employer - fetched rows / total rows = 4/4 - +-----------+------------------------+---------+ - | firstname | employer | count() | - |-----------+------------------------+---------| - | Amber | Pyrami | 1 | - | Dale | employer not specified | 1 | - | Hattie | Netagy | 1 | - | Nanette | Quility | 1 | - +-----------+------------------------+---------+ - -Example 7: Using chart command with span -======================================== - -This example demonstrates using span for grouping age ranges. - -PPL query:: - - os> source=accounts | chart max(balance) by age span=10, gender - fetched rows / total rows = 2/2 - +-----+--------+--------------+ - | age | gender | max(balance) | - |-----+--------+--------------| - | 20 | F | 32838 | - | 30 | M | 39225 | - +-----+--------+--------------+ - -Limitations -=========== -* Only a single aggregation function is supported per chart command. diff --git a/docs/user/ppl/cmd/dedup.md b/docs/user/ppl/cmd/dedup.md new file mode 100644 index 00000000000..59dfcf63ddc --- /dev/null +++ b/docs/user/ppl/cmd/dedup.md @@ -0,0 +1,134 @@ +# dedup + +## Description + +The `dedup` command removes duplicate documents defined by specified fields from the search result. +## Syntax + +dedup [int] \ [keepempty=\] [consecutive=\] +* int: optional. The `dedup` command retains multiple events for each combination when you specify \. The number for \ must be greater than 0. All other duplicates are removed from the results. **Default:** 1 +* keepempty: optional. If set to true, keep the document if the any field in the field-list has NULL value or field is MISSING. **Default:** false. +* consecutive: optional. If set to true, removes only events with duplicate combinations of values that are consecutive. **Default:** false. +* field-list: mandatory. The comma-delimited field list. At least one field is required. + +## Example 1: Dedup by one field + +This example shows deduplicating documents by gender field. + +```ppl +source=accounts +| dedup gender +| fields account_number, gender +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------+--------+ +| account_number | gender | +|----------------+--------| +| 1 | M | +| 13 | F | ++----------------+--------+ +``` + +## Example 2: Keep 2 duplicates documents + +This example shows deduplicating documents by gender field while keeping 2 duplicates. + +```ppl +source=accounts +| dedup 2 gender +| fields account_number, gender +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+--------+ +| account_number | gender | +|----------------+--------| +| 1 | M | +| 6 | M | +| 13 | F | ++----------------+--------+ +``` + +## Example 3: Keep or Ignore the empty field by default + +This example shows deduplicating documents while keeping null values. + +```ppl +source=accounts +| dedup email keepempty=true +| fields account_number, email +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------------------+ +| account_number | email | +|----------------+-----------------------| +| 1 | amberduke@pyrami.com | +| 6 | hattiebond@netagy.com | +| 13 | null | +| 18 | daleadams@boink.com | ++----------------+-----------------------+ +``` + +This example shows deduplicating documents while ignoring null values. + +```ppl +source=accounts +| dedup email +| fields account_number, email +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+-----------------------+ +| account_number | email | +|----------------+-----------------------| +| 1 | amberduke@pyrami.com | +| 6 | hattiebond@netagy.com | +| 18 | daleadams@boink.com | ++----------------+-----------------------+ +``` + +## Example 4: Dedup in consecutive document + +This example shows deduplicating consecutive documents. + +```ppl +source=accounts +| dedup gender consecutive=true +| fields account_number, gender +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+--------+ +| account_number | gender | +|----------------+--------| +| 1 | M | +| 13 | F | +| 18 | M | ++----------------+--------+ +``` + +## Limitations + +The `dedup` with `consecutive=true` command can only work with `plugins.calcite.enabled=false`. \ No newline at end of file diff --git a/docs/user/ppl/cmd/dedup.rst b/docs/user/ppl/cmd/dedup.rst deleted file mode 100644 index bc3e9a48ca5..00000000000 --- a/docs/user/ppl/cmd/dedup.rst +++ /dev/null @@ -1,111 +0,0 @@ -===== -dedup -===== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``dedup`` command removes duplicate documents defined by specified fields from the search result. - -Syntax -====== -dedup [int] [keepempty=] [consecutive=] - -* int: optional. The ``dedup`` command retains multiple events for each combination when you specify . The number for must be greater than 0. All other duplicates are removed from the results. **Default:** 1 -* keepempty: optional. If set to true, keep the document if the any field in the field-list has NULL value or field is MISSING. **Default:** false. -* consecutive: optional. If set to true, removes only events with duplicate combinations of values that are consecutive. **Default:** false. -* field-list: mandatory. The comma-delimited field list. At least one field is required. - -Example 1: Dedup by one field -============================= - -This example shows deduplicating documents by gender field. - -PPL query:: - - os> source=accounts | dedup gender | fields account_number, gender | sort account_number; - fetched rows / total rows = 2/2 - +----------------+--------+ - | account_number | gender | - |----------------+--------| - | 1 | M | - | 13 | F | - +----------------+--------+ - -Example 2: Keep 2 duplicates documents -====================================== - -This example shows deduplicating documents by gender field while keeping 2 duplicates. - -PPL query:: - - os> source=accounts | dedup 2 gender | fields account_number, gender | sort account_number; - fetched rows / total rows = 3/3 - +----------------+--------+ - | account_number | gender | - |----------------+--------| - | 1 | M | - | 6 | M | - | 13 | F | - +----------------+--------+ - -Example 3: Keep or Ignore the empty field by default -==================================================== - -This example shows deduplicating documents while keeping null values. - -PPL query:: - - os> source=accounts | dedup email keepempty=true | fields account_number, email | sort account_number; - fetched rows / total rows = 4/4 - +----------------+-----------------------+ - | account_number | email | - |----------------+-----------------------| - | 1 | amberduke@pyrami.com | - | 6 | hattiebond@netagy.com | - | 13 | null | - | 18 | daleadams@boink.com | - +----------------+-----------------------+ - - -This example shows deduplicating documents while ignoring null values. - -PPL query:: - - os> source=accounts | dedup email | fields account_number, email | sort account_number; - fetched rows / total rows = 3/3 - +----------------+-----------------------+ - | account_number | email | - |----------------+-----------------------| - | 1 | amberduke@pyrami.com | - | 6 | hattiebond@netagy.com | - | 18 | daleadams@boink.com | - +----------------+-----------------------+ - - -Example 4: Dedup in consecutive document -======================================== - -This example shows deduplicating consecutive documents. - -PPL query:: - - os> source=accounts | dedup gender consecutive=true | fields account_number, gender | sort account_number; - fetched rows / total rows = 3/3 - +----------------+--------+ - | account_number | gender | - |----------------+--------| - | 1 | M | - | 13 | F | - | 18 | M | - +----------------+--------+ - -Limitations -=========== -The ``dedup`` with ``consecutive=true`` command can only work with ``plugins.calcite.enabled=false``. diff --git a/docs/user/ppl/cmd/describe.md b/docs/user/ppl/cmd/describe.md new file mode 100644 index 00000000000..d6efffc9d58 --- /dev/null +++ b/docs/user/ppl/cmd/describe.md @@ -0,0 +1,67 @@ +# describe + +## Description + +Use the `describe` command to query metadata of the index. `describe` command can only be used as the first command in the PPL query. +## Syntax + +describe [dataSource.][schema.]\ +* dataSource: optional. If dataSource is not provided, it resolves to opensearch dataSource. +* schema: optional. If schema is not provided, it resolves to default schema. +* tablename: mandatory. describe command must specify which tablename to query from. + +## Example 1: Fetch all the metadata + +This example describes the accounts index. + +```ppl +describe accounts +``` + +Expected output: + +```text +fetched rows / total rows = 11/11 ++----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ +| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | +|----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------| +| docTestCluster | null | accounts | account_number | null | bigint | null | null | null | 10 | 2 | null | null | null | null | null | 0 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | firstname | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | address | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 2 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | balance | null | bigint | null | null | null | 10 | 2 | null | null | null | null | null | 3 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | gender | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 4 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | city | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 5 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | employer | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 6 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | state | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 7 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | age | null | bigint | null | null | null | 10 | 2 | null | null | null | null | null | 8 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | email | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 9 | | null | null | null | null | NO | | +| docTestCluster | null | accounts | lastname | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | ++----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ +``` + +## Example 2: Fetch metadata with condition and filter + +This example retrieves columns with type bigint in the accounts index. + +```ppl +describe accounts +| where TYPE_NAME="bigint" +| fields COLUMN_NAME +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+ +| COLUMN_NAME | +|----------------| +| account_number | +| balance | +| age | ++----------------+ +``` + +## Example 3: Fetch metadata for table in Prometheus datasource + +See [Fetch metadata for table in Prometheus datasource](../admin/datasources.md) for more context. \ No newline at end of file diff --git a/docs/user/ppl/cmd/describe.rst b/docs/user/ppl/cmd/describe.rst deleted file mode 100644 index 2fbb4003414..00000000000 --- a/docs/user/ppl/cmd/describe.rst +++ /dev/null @@ -1,70 +0,0 @@ -======== -describe -======== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -Use the ``describe`` command to query metadata of the index. ``describe`` command can only be used as the first command in the PPL query. - -Syntax -====== -describe [dataSource.][schema.] - -* dataSource: optional. If dataSource is not provided, it resolves to opensearch dataSource. -* schema: optional. If schema is not provided, it resolves to default schema. -* tablename: mandatory. describe command must specify which tablename to query from. - -Example 1: Fetch all the metadata -================================= - -This example describes the accounts index. - -PPL query:: - - os> describe accounts; - fetched rows / total rows = 11/11 - +----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ - | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | - |----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------| - | docTestCluster | null | accounts | account_number | null | bigint | null | null | null | 10 | 2 | null | null | null | null | null | 0 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | firstname | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | address | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 2 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | balance | null | bigint | null | null | null | 10 | 2 | null | null | null | null | null | 3 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | gender | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 4 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | city | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 5 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | employer | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 6 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | state | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 7 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | age | null | bigint | null | null | null | 10 | 2 | null | null | null | null | null | 8 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | email | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 9 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | lastname | null | string | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | - +----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ - -Example 2: Fetch metadata with condition and filter -=================================================== - -This example retrieves columns with type bigint in the accounts index. - -PPL query:: - - os> describe accounts | where TYPE_NAME="bigint" | fields COLUMN_NAME; - fetched rows / total rows = 3/3 - +----------------+ - | COLUMN_NAME | - |----------------| - | account_number | - | balance | - | age | - +----------------+ - - -Example 3: Fetch metadata for table in Prometheus datasource -============================================================ - -See `Fetch metadata for table in Prometheus datasource <../admin/datasources.rst>`_ for more context. diff --git a/docs/user/ppl/cmd/eval.md b/docs/user/ppl/cmd/eval.md new file mode 100644 index 00000000000..d3300cd6b01 --- /dev/null +++ b/docs/user/ppl/cmd/eval.md @@ -0,0 +1,132 @@ +# eval + +## Description + +The `eval` command evaluates the expression and appends the result to the search result. +## Syntax + +eval \=\ ["," \=\ ]... +* field: mandatory. If the field name does not exist, a new field is added. If the field name already exists, it will be overridden. +* expression: mandatory. Any expression supported by the system. + +## Example 1: Create a new field + +This example shows creating a new field doubleAge for each document. The new doubleAge field is the result of multiplying age by 2. + +```ppl +source=accounts +| eval doubleAge = age * 2 +| fields age, doubleAge +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+-----------+ +| age | doubleAge | +|-----+-----------| +| 32 | 64 | +| 36 | 72 | +| 28 | 56 | +| 33 | 66 | ++-----+-----------+ +``` + +## Example 2: Override an existing field + +This example shows overriding the existing age field by adding 1 to it. + +```ppl +source=accounts +| eval age = age + 1 +| fields age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+ +| age | +|-----| +| 33 | +| 37 | +| 29 | +| 34 | ++-----+ +``` + +## Example 3: Create a new field with field defined in eval + +This example shows creating a new field ddAge using a field defined in the same eval command. The new field ddAge is the result of multiplying doubleAge by 2, where doubleAge is defined in the same eval command. + +```ppl +source=accounts +| eval doubleAge = age * 2, ddAge = doubleAge * 2 +| fields age, doubleAge, ddAge +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+-----------+-------+ +| age | doubleAge | ddAge | +|-----+-----------+-------| +| 32 | 64 | 128 | +| 36 | 72 | 144 | +| 28 | 56 | 112 | +| 33 | 66 | 132 | ++-----+-----------+-------+ +``` + +## Example 4: String concatenation + +This example shows using the + operator for string concatenation. You can concatenate string literals and field values. + +```ppl +source=accounts +| eval greeting = 'Hello ' + firstname +| fields firstname, greeting +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+---------------+ +| firstname | greeting | +|-----------+---------------| +| Amber | Hello Amber | +| Hattie | Hello Hattie | +| Nanette | Hello Nanette | +| Dale | Hello Dale | ++-----------+---------------+ +``` + +## Example 5: Multiple string concatenation with type casting + +This example shows multiple concatenations with type casting from numeric to string. + +```ppl +source=accounts | eval full_info = 'Name: ' + firstname + ', Age: ' + CAST(age AS STRING) | fields firstname, age, full_info +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+------------------------+ +| firstname | age | full_info | +|-----------+-----+------------------------| +| Amber | 32 | Name: Amber, Age: 32 | +| Hattie | 36 | Name: Hattie, Age: 36 | +| Nanette | 28 | Name: Nanette, Age: 28 | +| Dale | 33 | Name: Dale, Age: 33 | ++-----------+-----+------------------------+ +``` + +## Limitations + +The `eval` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. \ No newline at end of file diff --git a/docs/user/ppl/cmd/eval.rst b/docs/user/ppl/cmd/eval.rst deleted file mode 100644 index ada7b179526..00000000000 --- a/docs/user/ppl/cmd/eval.rst +++ /dev/null @@ -1,120 +0,0 @@ -==== -eval -==== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``eval`` command evaluates the expression and appends the result to the search result. - -Syntax -====== -eval = ["," = ]... - -* field: mandatory. If the field name does not exist, a new field is added. If the field name already exists, it will be overridden. -* expression: mandatory. Any expression supported by the system. - -Example 1: Create a new field -============================= - -This example shows creating a new field doubleAge for each document. The new doubleAge field is the result of multiplying age by 2. - -PPL query:: - - os> source=accounts | eval doubleAge = age * 2 | fields age, doubleAge ; - fetched rows / total rows = 4/4 - +-----+-----------+ - | age | doubleAge | - |-----+-----------| - | 32 | 64 | - | 36 | 72 | - | 28 | 56 | - | 33 | 66 | - +-----+-----------+ - - -Example 2: Override an existing field -===================================== - -This example shows overriding the existing age field by adding 1 to it. - -PPL query:: - - os> source=accounts | eval age = age + 1 | fields age ; - fetched rows / total rows = 4/4 - +-----+ - | age | - |-----| - | 33 | - | 37 | - | 29 | - | 34 | - +-----+ - -Example 3: Create a new field with field defined in eval -======================================================== - -This example shows creating a new field ddAge using a field defined in the same eval command. The new field ddAge is the result of multiplying doubleAge by 2, where doubleAge is defined in the same eval command. - -PPL query:: - - os> source=accounts | eval doubleAge = age * 2, ddAge = doubleAge * 2 | fields age, doubleAge, ddAge ; - fetched rows / total rows = 4/4 - +-----+-----------+-------+ - | age | doubleAge | ddAge | - |-----+-----------+-------| - | 32 | 64 | 128 | - | 36 | 72 | 144 | - | 28 | 56 | 112 | - | 33 | 66 | 132 | - +-----+-----------+-------+ - -Example 4: String concatenation -=============================== - -This example shows using the + operator for string concatenation. You can concatenate string literals and field values. - -PPL query:: - - source=accounts | eval greeting = 'Hello ' + firstname | fields firstname, greeting - -Expected result:: - - +---------------+---------------------+ - | firstname | greeting | - |---------------+---------------------| - | Amber JOHnny | Hello Amber JOHnny | - | Hattie | Hello Hattie | - | Nanette | Hello Nanette | - | Dale | Hello Dale | - +---------------+---------------------+ - -Example 5: Multiple string concatenation with type casting -========================================================== - -This example shows multiple concatenations with type casting from numeric to string. - -PPL query:: - - source=accounts | eval full_info = 'Name: ' + firstname + ', Age: ' + CAST(age AS STRING) | fields firstname, age, full_info - -Expected result:: - - +---------------+-----+-------------------------------+ - | firstname | age | full_info | - |---------------+-----+-------------------------------| - | Amber JOHnny | 32 | Name: Amber JOHnny, Age: 32 | - | Hattie | 36 | Name: Hattie, Age: 36 | - | Nanette | 28 | Name: Nanette, Age: 28 | - | Dale | 33 | Name: Dale, Age: 33 | - +---------------+-----+-------------------------------+ - -Limitations -=========== -The ``eval`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. diff --git a/docs/user/ppl/cmd/eventstats.md b/docs/user/ppl/cmd/eventstats.md new file mode 100644 index 00000000000..1cb791d95e9 --- /dev/null +++ b/docs/user/ppl/cmd/eventstats.md @@ -0,0 +1,166 @@ +# eventstats + +## Description + +The `eventstats` command enriches your event data with calculated summary statistics. It operates by analyzing specified fields within your events, computing various statistical measures, and then appending these results as new fields to each original event. +Key aspects of `eventstats`: +1. It performs calculations across the entire result set or within defined groups. +2. The original events remain intact, with new fields added to contain the statistical results. +3. The command is particularly useful for comparative analysis, identifying outliers, or providing additional context to individual events. + +Difference between `stats` and `eventstats` +The `stats` and `eventstats` commands are both used for calculating statistics, but they have some key differences in how they operate and what they produce: +* Output Format + * `stats`: Produces a summary table with only the calculated statistics. + * `eventstats`: Adds the calculated statistics as new fields to the existing events, preserving the original data. +* Event Retention + * `stats`: Reduces the result set to only the statistical summary, discarding individual events. + * `eventstats`: Retains all original events and adds new fields with the calculated statistics. +* Use Cases + * `stats`: Best for creating summary reports or dashboards. Often used as a final command to summarize results. + * `eventstats`: Useful when you need to enrich events with statistical context for further analysis or filtering. It can be used mid-search to add statistics that can be used in subsequent commands. + +## Syntax + +eventstats [bucket_nullable=bool] \... [by-clause] +* function: mandatory. An aggregation function or window function. +* bucket_nullable: optional. Controls whether the eventstats command consider null buckets as a valid group in group-by aggregations. When set to `false`, it will not treat null group-by values as a distinct group during aggregation. **Default:** Determined by `plugins.ppl.syntax.legacy.preferred`. + * When `plugins.ppl.syntax.legacy.preferred=true`, `bucket_nullable` defaults to `true` + * When `plugins.ppl.syntax.legacy.preferred=false`, `bucket_nullable` defaults to `false` +* by-clause: optional. Groups results by specified fields or expressions. Syntax: by [span-expression,] [field,]... **Default:** aggregation over the entire result set. +* span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). For example, `span(age, 10)` creates 10-year age buckets, `span(timestamp, 1h)` creates hourly buckets. + * Available time units: + * millisecond (ms) + * second (s) + * minute (m, case sensitive) + * hour (h) + * day (d) + * week (w) + * month (M, case sensitive) + * quarter (q) + * year (y) + +## Aggregation Functions + +The eventstats command supports the following aggregation functions: +* COUNT: Count of values +* SUM: Sum of numeric values +* AVG: Average of numeric values +* MAX: Maximum value +* MIN: Minimum value +* VAR_SAMP: Sample variance +* VAR_POP: Population variance +* STDDEV_SAMP: Sample standard deviation +* STDDEV_POP: Population standard deviation +* DISTINCT_COUNT/DC: Distinct count of values +* EARLIEST: Earliest value by timestamp +* LATEST: Latest value by timestamp + +For detailed documentation of each function, see [Aggregation Functions](../functions/aggregations.md). +## Usage + +Eventstats + +```sql ignore +source = table | eventstats avg(a) +source = table | where a < 50 | eventstats count(c) +source = table | eventstats min(c), max(c) by b +source = table | eventstats count(c) as count_by by b | where count_by > 1000 +source = table | eventstats dc(field) as distinct_count +source = table | eventstats distinct_count(category) by region +``` + +## Example 1: Calculate the average, sum and count of a field by group + +This example shows calculating the average age, sum of age, and count of events for all accounts grouped by gender. + +```ppl +source=accounts +| fields account_number, gender, age +| eventstats avg(age), sum(age), count() by gender +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+--------+-----+--------------------+----------+---------+ +| account_number | gender | age | avg(age) | sum(age) | count() | +|----------------+--------+-----+--------------------+----------+---------| +| 1 | M | 32 | 33.666666666666664 | 101 | 3 | +| 6 | M | 36 | 33.666666666666664 | 101 | 3 | +| 13 | F | 28 | 28.0 | 28 | 1 | +| 18 | M | 33 | 33.666666666666664 | 101 | 3 | ++----------------+--------+-----+--------------------+----------+---------+ +``` + +## Example 2: Calculate the count by a gender and span + +This example shows counting events by age intervals of 5 years, grouped by gender. + +```ppl +source=accounts +| fields account_number, gender, age +| eventstats count() as cnt by span(age, 5) as age_span, gender +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+--------+-----+-----+ +| account_number | gender | age | cnt | +|----------------+--------+-----+-----| +| 1 | M | 32 | 2 | +| 6 | M | 36 | 1 | +| 13 | F | 28 | 1 | +| 18 | M | 33 | 2 | ++----------------+--------+-----+-----+ +``` + +## Example 3: Null buckets handling + +```ppl +source=accounts +| eventstats bucket_nullable=false count() as cnt by employer +| fields account_number, firstname, employer, cnt +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------+------+ +| account_number | firstname | employer | cnt | +|----------------+-----------+----------+------| +| 1 | Amber | Pyrami | 1 | +| 6 | Hattie | Netagy | 1 | +| 13 | Nanette | Quility | 1 | +| 18 | Dale | null | null | ++----------------+-----------+----------+------+ +``` + +```ppl +source=accounts +| eventstats bucket_nullable=true count() as cnt by employer +| fields account_number, firstname, employer, cnt +| sort account_number +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------+-----+ +| account_number | firstname | employer | cnt | +|----------------+-----------+----------+-----| +| 1 | Amber | Pyrami | 1 | +| 6 | Hattie | Netagy | 1 | +| 13 | Nanette | Quility | 1 | +| 18 | Dale | null | 1 | ++----------------+-----------+----------+-----+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/eventstats.rst b/docs/user/ppl/cmd/eventstats.rst deleted file mode 100644 index cf4ac0d9b02..00000000000 --- a/docs/user/ppl/cmd/eventstats.rst +++ /dev/null @@ -1,162 +0,0 @@ -========== -eventstats -========== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``eventstats`` command enriches your event data with calculated summary statistics. It operates by analyzing specified fields within your events, computing various statistical measures, and then appending these results as new fields to each original event. - -| Key aspects of `eventstats`: - -1. It performs calculations across the entire result set or within defined groups. -2. The original events remain intact, with new fields added to contain the statistical results. -3. The command is particularly useful for comparative analysis, identifying outliers, or providing additional context to individual events. - -| Difference between ``stats`` and ``eventstats`` -The ``stats`` and ``eventstats`` commands are both used for calculating statistics, but they have some key differences in how they operate and what they produce: - -* Output Format - - * ``stats``: Produces a summary table with only the calculated statistics. - * ``eventstats``: Adds the calculated statistics as new fields to the existing events, preserving the original data. - -* Event Retention - - * ``stats``: Reduces the result set to only the statistical summary, discarding individual events. - * ``eventstats``: Retains all original events and adds new fields with the calculated statistics. - -* Use Cases - - * ``stats``: Best for creating summary reports or dashboards. Often used as a final command to summarize results. - * ``eventstats``: Useful when you need to enrich events with statistical context for further analysis or filtering. Can be used mid-search to add statistics that can be used in subsequent commands. - - -Syntax -====== -eventstats [bucket_nullable=bool] ... [by-clause] - -* function: mandatory. An aggregation function or window function. -* bucket_nullable: optional. Controls whether the eventstats command consider null buckets as a valid group in group-by aggregations. When set to ``false``, it will not treat null group-by values as a distinct group during aggregation. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``bucket_nullable`` defaults to ``true`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``bucket_nullable`` defaults to ``false`` - -* by-clause: optional. Groups results by specified fields or expressions. Syntax: by [span-expression,] [field,]... **Default:** aggregation over the entire result set. -* span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). For example, ``span(age, 10)`` creates 10-year age buckets, ``span(timestamp, 1h)`` creates hourly buckets. - - * Available time units: - - * millisecond (ms) - * second (s) - * minute (m, case sensitive) - * hour (h) - * day (d) - * week (w) - * month (M, case sensitive) - * quarter (q) - * year (y) - -Aggregation Functions -===================== - -The eventstats command supports the following aggregation functions: - -* COUNT: Count of values -* SUM: Sum of numeric values -* AVG: Average of numeric values -* MAX: Maximum value -* MIN: Minimum value -* VAR_SAMP: Sample variance -* VAR_POP: Population variance -* STDDEV_SAMP: Sample standard deviation -* STDDEV_POP: Population standard deviation -* DISTINCT_COUNT/DC: Distinct count of values -* EARLIEST: Earliest value by timestamp -* LATEST: Latest value by timestamp - -For detailed documentation of each function, see `Aggregation Functions <../functions/aggregations.rst>`_. - -Usage -===== - -Eventstats:: - - source = table | eventstats avg(a) - source = table | where a < 50 | eventstats count(c) - source = table | eventstats min(c), max(c) by b - source = table | eventstats count(c) as count_by by b | where count_by > 1000 - source = table | eventstats dc(field) as distinct_count - source = table | eventstats distinct_count(category) by region - - -Example 1: Calculate the average, sum and count of a field by group -=================================================================== - -This example shows calculating the average age, sum of age, and count of events for all accounts grouped by gender. - -PPL query:: - - os> source=accounts | fields account_number, gender, age | eventstats avg(age), sum(age), count() by gender | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+--------------------+----------+---------+ - | account_number | gender | age | avg(age) | sum(age) | count() | - |----------------+--------+-----+--------------------+----------+---------| - | 1 | M | 32 | 33.666666666666664 | 101 | 3 | - | 6 | M | 36 | 33.666666666666664 | 101 | 3 | - | 13 | F | 28 | 28.0 | 28 | 1 | - | 18 | M | 33 | 33.666666666666664 | 101 | 3 | - +----------------+--------+-----+--------------------+----------+---------+ - -Example 2: Calculate the count by a gender and span -=================================================== - -This example shows counting events by age intervals of 5 years, grouped by gender. - -PPL query:: - - os> source=accounts | fields account_number, gender, age | eventstats count() as cnt by span(age, 5) as age_span, gender | sort account_number; - fetched rows / total rows = 4/4 - +----------------+--------+-----+-----+ - | account_number | gender | age | cnt | - |----------------+--------+-----+-----| - | 1 | M | 32 | 2 | - | 6 | M | 36 | 1 | - | 13 | F | 28 | 1 | - | 18 | M | 33 | 2 | - +----------------+--------+-----+-----+ - -Example 3: Null buckets handling -================================ - -PPL query:: - - os> source=accounts | eventstats bucket_nullable=false count() as cnt by employer | fields account_number, firstname, employer, cnt | sort account_number; - fetched rows / total rows = 4/4 - +----------------+-----------+----------+------+ - | account_number | firstname | employer | cnt | - |----------------+-----------+----------+------| - | 1 | Amber | Pyrami | 1 | - | 6 | Hattie | Netagy | 1 | - | 13 | Nanette | Quility | 1 | - | 18 | Dale | null | null | - +----------------+-----------+----------+------+ - -PPL query:: - - os> source=accounts | eventstats bucket_nullable=true count() as cnt by employer | fields account_number, firstname, employer, cnt | sort account_number; - fetched rows / total rows = 4/4 - +----------------+-----------+----------+-----+ - | account_number | firstname | employer | cnt | - |----------------+-----------+----------+-----| - | 1 | Amber | Pyrami | 1 | - | 6 | Hattie | Netagy | 1 | - | 13 | Nanette | Quility | 1 | - | 18 | Dale | null | 1 | - +----------------+-----------+----------+-----+ diff --git a/docs/user/ppl/cmd/expand.md b/docs/user/ppl/cmd/expand.md new file mode 100644 index 00000000000..8fddbea7ad7 --- /dev/null +++ b/docs/user/ppl/cmd/expand.md @@ -0,0 +1,50 @@ +# expand + +## Description + +The `expand` command transforms a single document with a nested array field into multiple documents—each containing one element from the array. All other fields in the original document are duplicated across the resulting documents. +Key aspects of `expand`: +* It generates one row per element in the specified array field. +* The specified array field is converted into individual rows. +* If an alias is provided, the expanded values appear under the alias instead of the original field name. +* If the specified field is an empty array, the row is retained with the expanded field set to null. + +## Syntax + +expand \ [as alias] +* field: mandatory. The field to be expanded (exploded). Currently only nested arrays are supported. +* alias: optional. The name to use instead of the original field name. + +## Example 1: Expand address field with an alias + +Given a dataset `migration` with the following data: + +```text +{"name":"abbas","age":24,"address":[{"city":"New york city","state":"NY","moveInDate":{"dateAndTime":"19840412T090742.000Z"}}]} +{"name":"chen","age":32,"address":[{"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}},{"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}}]} + +``` + +The following query expand the address field and rename it to addr: + +```ppl +source=migration +| expand address as addr +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-------+-----+-------------------------------------------------------------------------------------------+ +| name | age | addr | +|-------+-----+-------------------------------------------------------------------------------------------| +| abbas | 24 | {"city":"New york city","state":"NY","moveInDate":{"dateAndTime":"19840412T090742.000Z"}} | +| chen | 32 | {"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}} | +| chen | 32 | {"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}} | ++-------+-----+-------------------------------------------------------------------------------------------+ +``` + +## Limitations + +* The `expand` command currently only supports nested arrays. Primitive fields storing arrays are not supported. E.g. a string field storing an array of strings cannot be expanded with the current implementation. \ No newline at end of file diff --git a/docs/user/ppl/cmd/expand.rst b/docs/user/ppl/cmd/expand.rst deleted file mode 100644 index c8065a2da0f..00000000000 --- a/docs/user/ppl/cmd/expand.rst +++ /dev/null @@ -1,61 +0,0 @@ -====== -expand -====== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``expand`` command transforms a single document with a nested array field into multiple documents—each containing one element from the array. All other fields in the original document are duplicated across the resulting documents. - -| Key aspects of ``expand``: - -* It generates one row per element in the specified array field. -* The specified array field is converted into individual rows. -* If an alias is provided, the expanded values appear under the alias instead of the original field name. -* If the specified field is an empty array, the row is retained with the expanded field set to null. - -Syntax -====== - -expand [as alias] - -* field: mandatory. The field to be expanded (exploded). Currently only nested arrays are supported. -* alias: optional. The name to use instead of the original field name. - - -Example 1: Expand address field with an alias -============================================= - -Given a dataset ``migration`` with the following data: - -.. code-block:: - - {"name":"abbas","age":24,"address":[{"city":"New york city","state":"NY","moveInDate":{"dateAndTime":"19840412T090742.000Z"}}]} - {"name":"chen","age":32,"address":[{"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}},{"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}}]} - -The following query expand the address field and rename it to addr: - -PPL query:: - - PPL> source=migration | expand address as addr; - fetched rows / total rows = 3/3 - +-------+-----+-------------------------------------------------------------------------------------------+ - | name | age | addr | - |-------+-----+-------------------------------------------------------------------------------------------| - | abbas | 24 | {"city":"New york city","state":"NY","moveInDate":{"dateAndTime":"19840412T090742.000Z"}} | - | chen | 32 | {"city":"Miami","state":"Florida","moveInDate":{"dateAndTime":"19010811T040333.000Z"}} | - | chen | 32 | {"city":"los angeles","state":"CA","moveInDate":{"dateAndTime":"20230503T080742.000Z"}} | - +-------+-----+-------------------------------------------------------------------------------------------+ - -Limitations -=========== - -* The ``expand`` command currently only supports nested arrays. Primitive - fields storing arrays are not supported. E.g. a string field storing an array - of strings cannot be expanded with the current implementation. diff --git a/docs/user/ppl/cmd/explain.md b/docs/user/ppl/cmd/explain.md new file mode 100644 index 00000000000..fb60a3b1207 --- /dev/null +++ b/docs/user/ppl/cmd/explain.md @@ -0,0 +1,181 @@ +# explain + +## Description + +The `explain` command explains the plan of query which is often used for query translation and troubleshooting. The `explain` command can only be used as the first command in the PPL query. +## Syntax + +explain queryStatement +* mode: optional. There are 4 explain modes: "simple", "standard", "cost", "extended". **Default:** standard. + * standard: The default mode. Display logical and physical plan with pushdown information (DSL). + * simple: Display the logical plan tree without attributes. + * cost: Display the standard information plus plan cost attributes. + * extended: Display the standard information plus generated code. +* queryStatement: mandatory. A PPL query to explain. + +## Example 1: Explain a PPL query in v2 engine + +When Calcite is disabled (plugins.calcite.enabled=false), explaining a PPL query will get its physical plan of v2 engine and pushdown information. + +```ppl +explain source=state_country +| where country = 'USA' OR country = 'England' +| stats count() by country +``` + +Explain: + +```json +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[count(), country]" + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": """OpenSearchQueryRequest(indexName=state_country, sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"should":[{"term":{"country":{"value":"USA","boost":1.0}}},{"term":{"country":{"value":"England","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"country":{"terms":{"field":"country","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)""" + }, + "children": [] + } + ] + } +} +``` + +## Example 2: Explain a PPL query in v3 engine + +When Calcite is enabled (plugins.calcite.enabled=true), explaining a PPL query will get its logical and physical plan of v3 engine and pushdown information. + +```ppl +explain source=state_country +| where country = 'USA' OR country = 'England' +| stats count() by country +``` + +Explain + +```json +{ + "calcite": { + "logical": """LogicalProject(count()=[$1], country=[$0]) + LogicalAggregate(group=[{1}], count()=[COUNT()]) + LogicalFilter(condition=[SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]) +""", + "physical": """EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], country=[$t0]) + CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[FILTER->SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7)), AGGREGATION->rel#53:LogicalAggregate.NONE.[](input=RelSubset#43,group={1},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"terms":{"country":["England","USA"],"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"country":{"terms":{"field":"country","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +""" + } +} +``` + +## Example 3: Explain a PPL query with simple mode + +When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query with the "simple" mode. + +```ppl +explain simple source=state_country +| where country = 'USA' OR country = 'England' +| stats count() by country +``` + +Explain + +``` +{ + "calcite": { + "logical": """LogicalProject + LogicalAggregate + LogicalFilter + CalciteLogicalIndexScan +""" + } +} +``` + +## Example 4: Explain a PPL query with cost mode + +When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query with the "cost" mode. + +```ppl +explain cost source=state_country +| where country = 'USA' OR country = 'England' +| stats count() by country +``` + +Explain + +```json +{ + "calcite": { + "logical": """LogicalProject(count()=[$1], country=[$0]): rowcount = 2.5, cumulative cost = {130.3125 rows, 206.0 cpu, 0.0 io}, id = 75 + LogicalAggregate(group=[{1}], count()=[COUNT()]): rowcount = 2.5, cumulative cost = {127.8125 rows, 201.0 cpu, 0.0 io}, id = 74 + LogicalFilter(condition=[SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7))]): rowcount = 25.0, cumulative cost = {125.0 rows, 201.0 cpu, 0.0 io}, id = 73 + CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 72 +""", + "physical": """EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], country=[$t0]): rowcount = 100.0, cumulative cost = {200.0 rows, 501.0 cpu, 0.0 io}, id = 138 + CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[FILTER->SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7)), AGGREGATION->rel#125:LogicalAggregate.NONE.[](input=RelSubset#115,group={1},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"terms":{"country":["England","USA"],"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"country":{"terms":{"field":"country","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 133 +""" + } +} +``` + +## Example 5: Explain a PPL query with extended mode + +```ppl +explain extended source=state_country +| where country = 'USA' OR country = 'England' +| stats count() by country +``` + +Explain + +```json +{ + "calcite": { + "logical": """LogicalProject(count()=[$1], country=[$0]) + LogicalAggregate(group=[{1}], count()=[COUNT()]) + LogicalFilter(condition=[SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7))]) + CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]) +""", + "physical": """EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], country=[$t0]) + CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[FILTER->SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7)), AGGREGATION->rel#193:LogicalAggregate.NONE.[](input=RelSubset#183,group={1},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"terms":{"country":["England","USA"],"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"country":{"terms":{"field":"country","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) +""", + "extended": """public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) { + final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get("v1stashed"); + final org.apache.calcite.linq4j.Enumerable _inputEnumerable = v1stashed.scan(); + return new org.apache.calcite.linq4j.AbstractEnumerable(){ + public org.apache.calcite.linq4j.Enumerator enumerator() { + return new org.apache.calcite.linq4j.Enumerator(){ + public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator(); + public void reset() { + inputEnumerator.reset(); + } + public boolean moveNext() { + return inputEnumerator.moveNext(); + } + public void close() { + inputEnumerator.close(); + } + public Object current() { + final Object[] current = (Object[]) inputEnumerator.current(); + final Object input_value = current[1]; + final Object input_value0 = current[0]; + return new Object[] { + input_value, + input_value0}; + } + }; + } + }; +} +public Class getElementType() { + return java.lang.Object[].class; +} +""" + } +} +``` \ No newline at end of file diff --git a/docs/user/ppl/cmd/explain.rst b/docs/user/ppl/cmd/explain.rst deleted file mode 100644 index fb14dfd39f9..00000000000 --- a/docs/user/ppl/cmd/explain.rst +++ /dev/null @@ -1,190 +0,0 @@ -======= -explain -======= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``explain`` command explains the plan of query which is often used for query translation and troubleshooting. The ``explain`` command can only be used as the first command in the PPL query. - -Syntax -====== -explain queryStatement - -* mode: optional. There are 4 explain modes: "simple", "standard", "cost", "extended". **Default:** standard. - - * standard: The default mode. Display logical and physical plan with pushdown information (DSL). - * simple: Display the logical plan tree without attributes. - * cost: Display the standard information plus plan cost attributes. - * extended: Display the standard information plus generated code. - -* queryStatement: mandatory. A PPL query to explain. - - - -Example 1: Explain a PPL query in v2 engine -=========================================== -When Calcite is disabled (plugins.calcite.enabled=false), explaining a PPL query will get its physical plan of v2 engine and pushdown information. - -PPL query:: - - PPL> explain source=state_country | where country = 'USA' OR country = 'England' | stats count() by country - -Explain:: - - { - "root": { - "name": "ProjectOperator", - "description": { - "fields": "[count(), country]" - }, - "children": [ - { - "name": "OpenSearchIndexScan", - "description": { - "request": """OpenSearchQueryRequest(indexName=state_country, sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"should":[{"term":{"country":{"value":"USA","boost":1.0}}},{"term":{"country":{"value":"England","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"country":{"terms":{"field":"country","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)""" - }, - "children": [] - } - ] - } - } - -Example 2: Explain a PPL query in v3 engine -=========================================== - -When Calcite is enabled (plugins.calcite.enabled=true), explaining a PPL query will get its logical and physical plan of v3 engine and pushdown information. - -PPL query:: - - PPL> explain source=state_country | where country = 'USA' OR country = 'England' | stats count() by country - -Explain:: - - { - "calcite": { - "logical": """LogicalProject(count()=[$1], country=[$0]) - LogicalAggregate(group=[{1}], count()=[COUNT()]) - LogicalFilter(condition=[SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7))]) - CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]) - """, - "physical": """EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], country=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[FILTER->SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7)), AGGREGATION->rel#53:LogicalAggregate.NONE.[](input=RelSubset#43,group={1},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"terms":{"country":["England","USA"],"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"country":{"terms":{"field":"country","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - """ - } - } - - -Example 3: Explain a PPL query with simple mode -=============================================== - -When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query with the "simple" mode. - -PPL query:: - - PPL> explain simple source=state_country | where country = 'USA' OR country = 'England' | stats count() by country - -Explain:: - - { - "calcite": { - "logical": """LogicalProject - LogicalAggregate - LogicalFilter - CalciteLogicalIndexScan - """ - } - } - -Example 4: Explain a PPL query with cost mode -============================================= - -When Calcite is enabled (plugins.calcite.enabled=true), you can explain a PPL query with the "cost" mode. - -PPL query:: - - PPL> explain cost source=state_country | where country = 'USA' OR country = 'England' | stats count() by country - -Explain:: - - { - "calcite": { - "logical": """LogicalProject(count()=[$1], country=[$0]): rowcount = 2.5, cumulative cost = {130.3125 rows, 206.0 cpu, 0.0 io}, id = 75 - LogicalAggregate(group=[{1}], count()=[COUNT()]): rowcount = 2.5, cumulative cost = {127.8125 rows, 201.0 cpu, 0.0 io}, id = 74 - LogicalFilter(condition=[SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7))]): rowcount = 25.0, cumulative cost = {125.0 rows, 201.0 cpu, 0.0 io}, id = 73 - CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 72 - """, - "physical": """EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], country=[$t0]): rowcount = 100.0, cumulative cost = {200.0 rows, 501.0 cpu, 0.0 io}, id = 138 - CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[FILTER->SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7)), AGGREGATION->rel#125:LogicalAggregate.NONE.[](input=RelSubset#115,group={1},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"terms":{"country":["England","USA"],"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"country":{"terms":{"field":"country","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 133 - """ - } - } - -Example 5: Explain a PPL query with extended mode -================================================= - -PPL query:: - - PPL> explain extended source=state_country | where country = 'USA' OR country = 'England' | stats count() by country - -Explain:: - - { - "calcite": { - "logical": """LogicalProject(count()=[$1], country=[$0]) - LogicalAggregate(group=[{1}], count()=[COUNT()]) - LogicalFilter(condition=[SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7))]) - CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]) - """, - "physical": """EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], country=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[FILTER->SEARCH($1, Sarg['England', 'USA':CHAR(7)]:CHAR(7)), AGGREGATION->rel#193:LogicalAggregate.NONE.[](input=RelSubset#183,group={1},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"terms":{"country":["England","USA"],"boost":1.0}},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"country":{"terms":{"field":"country","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) - """, - "extended": """public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) { - final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get("v1stashed"); - final org.apache.calcite.linq4j.Enumerable _inputEnumerable = v1stashed.scan(); - return new org.apache.calcite.linq4j.AbstractEnumerable(){ - public org.apache.calcite.linq4j.Enumerator enumerator() { - return new org.apache.calcite.linq4j.Enumerator(){ - public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator(); - public void reset() { - inputEnumerator.reset(); - } - - public boolean moveNext() { - return inputEnumerator.moveNext(); - } - - public void close() { - inputEnumerator.close(); - } - - public Object current() { - final Object[] current = (Object[]) inputEnumerator.current(); - final Object input_value = current[1]; - final Object input_value0 = current[0]; - return new Object[] { - input_value, - input_value0}; - } - - }; - } - - }; - } - - - public Class getElementType() { - return java.lang.Object[].class; - } - - - """ - } - } diff --git a/docs/user/ppl/cmd/fields.md b/docs/user/ppl/cmd/fields.md new file mode 100644 index 00000000000..507a8e6903f --- /dev/null +++ b/docs/user/ppl/cmd/fields.md @@ -0,0 +1,244 @@ +# fields + +## Description + +The `fields` command keeps or removes fields from the search result. +## Syntax + +fields [+\|-] \ +* +\|-: optional. If the plus (+) is used, only the fields specified in the field list will be kept. If the minus (-) is used, all the fields specified in the field list will be removed. **Default:** +. +* field-list: mandatory. Comma-delimited or space-delimited list of fields to keep or remove. Supports wildcard patterns. + +## Example 1: Select specified fields from result + +This example shows selecting account_number, firstname and lastname fields from search results. + +```ppl +source=accounts +| fields account_number, firstname, lastname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------+ +| account_number | firstname | lastname | +|----------------+-----------+----------| +| 1 | Amber | Duke | +| 6 | Hattie | Bond | +| 13 | Nanette | Bates | +| 18 | Dale | Adams | ++----------------+-----------+----------+ +``` + +## Example 2: Remove specified fields from result + +This example shows removing the account_number field from search results. + +```ppl +source=accounts +| fields account_number, firstname, lastname +| fields - account_number +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+ +| firstname | lastname | +|-----------+----------| +| Amber | Duke | +| Hattie | Bond | +| Nanette | Bates | +| Dale | Adams | ++-----------+----------+ +``` + +## Example 3: Space-delimited field selection + +Fields can be specified using spaces instead of commas, providing a more concise syntax. +**Syntax**: `fields field1 field2 field3` + +```ppl +source=accounts +| fields firstname lastname age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+-----+ +| firstname | lastname | age | +|-----------+----------+-----| +| Amber | Duke | 32 | +| Hattie | Bond | 36 | +| Nanette | Bates | 28 | +| Dale | Adams | 33 | ++-----------+----------+-----+ +``` + +## Example 4: Prefix wildcard pattern + +Select fields starting with a pattern using prefix wildcards. + +```ppl +source=accounts +| fields account* +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+ +| account_number | +|----------------| +| 1 | +| 6 | +| 13 | +| 18 | ++----------------+ +``` + +## Example 5: Suffix wildcard pattern + +Select fields ending with a pattern using suffix wildcards. + +```ppl +source=accounts +| fields *name +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+ +| firstname | lastname | +|-----------+----------| +| Amber | Duke | +| Hattie | Bond | +| Nanette | Bates | +| Dale | Adams | ++-----------+----------+ +``` + +## Example 6: Contains wildcard pattern + +Select fields containing a pattern using contains wildcards. + +```ppl +source=accounts +| fields *a* +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+-----------+-----------------+---------+-------+-----+----------------------+----------+ +| account_number | firstname | address | balance | state | age | email | lastname | +|----------------+-----------+-----------------+---------+-------+-----+----------------------+----------| +| 1 | Amber | 880 Holmes Lane | 39225 | IL | 32 | amberduke@pyrami.com | Duke | ++----------------+-----------+-----------------+---------+-------+-----+----------------------+----------+ +``` + +## Example 7: Mixed delimiter syntax + +Combine spaces and commas for flexible field specification. + +```ppl +source=accounts +| fields firstname, account* *name +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------------+----------+ +| firstname | account_number | lastname | +|-----------+----------------+----------| +| Amber | 1 | Duke | +| Hattie | 6 | Bond | +| Nanette | 13 | Bates | +| Dale | 18 | Adams | ++-----------+----------------+----------+ +``` + +## Example 8: Field deduplication + +Automatically prevents duplicate columns when wildcards expand to already specified fields. + +```ppl +source=accounts +| fields firstname, *name +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+ +| firstname | lastname | +|-----------+----------| +| Amber | Duke | +| Hattie | Bond | +| Nanette | Bates | +| Dale | Adams | ++-----------+----------+ +``` + +Note: Even though `firstname` is explicitly specified and would also match `*name`, it appears only once due to automatic deduplication. +## Example 9: Full wildcard selection + +Select all available fields using `*` or `` `*` ``. This selects all fields defined in the index schema, including fields that may contain null values. + +```ppl +source=accounts +| fields `*` +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+-----------+-----------------+---------+--------+--------+----------+-------+-----+----------------------+----------+ +| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | +|----------------+-----------+-----------------+---------+--------+--------+----------+-------+-----+----------------------+----------| +| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | ++----------------+-----------+-----------------+---------+--------+--------+----------+-------+-----+----------------------+----------+ +``` + +Note: The `*` wildcard selects fields based on the index schema, not on data content. Fields with null values are included in the result set. Use backticks `` `*` ` if the plain `*`` doesn't return all expected fields. +## Example 10: Wildcard exclusion + +Remove fields using wildcard patterns with the minus (-) operator. + +```ppl +source=accounts +| fields - *name +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+ +| account_number | address | balance | gender | city | employer | state | age | email | +|----------------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------| +| 1 | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | +| 6 | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | +| 13 | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | +| 18 | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | ++----------------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+ +``` + +## See Also + +- [table](table.md) - Alias command with identical functionality \ No newline at end of file diff --git a/docs/user/ppl/cmd/fields.rst b/docs/user/ppl/cmd/fields.rst deleted file mode 100644 index 81ccff71b80..00000000000 --- a/docs/user/ppl/cmd/fields.rst +++ /dev/null @@ -1,206 +0,0 @@ -====== -fields -====== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``fields`` command keeps or removes fields from the search result. - -Syntax -====== -fields [+|-] - -* +|-: optional. If the plus (+) is used, only the fields specified in the field list will be kept. If the minus (-) is used, all the fields specified in the field list will be removed. **Default:** +. -* field-list: mandatory. Comma-delimited or space-delimited list of fields to keep or remove. Supports wildcard patterns. - -Example 1: Select specified fields from result -============================================== - -This example shows selecting account_number, firstname and lastname fields from search results. - -PPL query:: - - os> source=accounts | fields account_number, firstname, lastname; - fetched rows / total rows = 4/4 - +----------------+-----------+----------+ - | account_number | firstname | lastname | - |----------------+-----------+----------| - | 1 | Amber | Duke | - | 6 | Hattie | Bond | - | 13 | Nanette | Bates | - | 18 | Dale | Adams | - +----------------+-----------+----------+ - -Example 2: Remove specified fields from result -============================================== - -This example shows removing the account_number field from search results. - -PPL query:: - - os> source=accounts | fields account_number, firstname, lastname | fields - account_number ; - fetched rows / total rows = 4/4 - +-----------+----------+ - | firstname | lastname | - |-----------+----------| - | Amber | Duke | - | Hattie | Bond | - | Nanette | Bates | - | Dale | Adams | - +-----------+----------+ - -Example 3: Space-delimited field selection -========================================== - -Fields can be specified using spaces instead of commas, providing a more concise syntax. - -**Syntax**: ``fields field1 field2 field3`` - -PPL query:: - - os> source=accounts | fields firstname lastname age; - fetched rows / total rows = 4/4 - +-----------+----------+-----+ - | firstname | lastname | age | - |-----------+----------+-----| - | Amber | Duke | 32 | - | Hattie | Bond | 36 | - | Nanette | Bates | 28 | - | Dale | Adams | 33 | - +-----------+----------+-----+ - -Example 4: Prefix wildcard pattern -================================== - -Select fields starting with a pattern using prefix wildcards. - -PPL query:: - - os> source=accounts | fields account*; - fetched rows / total rows = 4/4 - +----------------+ - | account_number | - |----------------| - | 1 | - | 6 | - | 13 | - | 18 | - +----------------+ - -Example 5: Suffix wildcard pattern -================================== - -Select fields ending with a pattern using suffix wildcards. - -PPL query:: - - os> source=accounts | fields *name; - fetched rows / total rows = 4/4 - +-----------+----------+ - | firstname | lastname | - |-----------+----------| - | Amber | Duke | - | Hattie | Bond | - | Nanette | Bates | - | Dale | Adams | - +-----------+----------+ - -Example 6: Contains wildcard pattern -==================================== - -Select fields containing a pattern using contains wildcards. - -PPL query:: - - os> source=accounts | fields *a* | head 1; - fetched rows / total rows = 1/1 - +----------------+-----------+-----------------+---------+-------+-----+----------------------+----------+ - | account_number | firstname | address | balance | state | age | email | lastname | - |----------------+-----------+-----------------+---------+-------+-----+----------------------+----------| - | 1 | Amber | 880 Holmes Lane | 39225 | IL | 32 | amberduke@pyrami.com | Duke | - +----------------+-----------+-----------------+---------+-------+-----+----------------------+----------+ - -Example 7: Mixed delimiter syntax -================================= - -Combine spaces and commas for flexible field specification. - -PPL query:: - - os> source=accounts | fields firstname, account* *name; - fetched rows / total rows = 4/4 - +-----------+----------------+----------+ - | firstname | account_number | lastname | - |-----------+----------------+----------| - | Amber | 1 | Duke | - | Hattie | 6 | Bond | - | Nanette | 13 | Bates | - | Dale | 18 | Adams | - +-----------+----------------+----------+ - -Example 8: Field deduplication -============================== - -Automatically prevents duplicate columns when wildcards expand to already specified fields. - -PPL query:: - - os> source=accounts | fields firstname, *name; - fetched rows / total rows = 4/4 - +-----------+----------+ - | firstname | lastname | - |-----------+----------| - | Amber | Duke | - | Hattie | Bond | - | Nanette | Bates | - | Dale | Adams | - +-----------+----------+ - -Note: Even though ``firstname`` is explicitly specified and would also match ``*name``, it appears only once due to automatic deduplication. - -Example 9: Full wildcard selection -================================== - -Select all available fields using ``*`` or ```*```. This selects all fields defined in the index schema, including fields that may contain null values. - -PPL query:: - - os> source=accounts | fields `*` | head 1; - fetched rows / total rows = 1/1 - +----------------+-----------+-----------------+---------+--------+--------+----------+-------+-----+----------------------+----------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |----------------+-----------+-----------------+---------+--------+--------+----------+-------+-----+----------------------+----------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - +----------------+-----------+-----------------+---------+--------+--------+----------+-------+-----+----------------------+----------+ - -Note: The ``*`` wildcard selects fields based on the index schema, not on data content. Fields with null values are included in the result set. Use backticks ```*``` if the plain ``*`` doesn't return all expected fields. - -Example 10: Wildcard exclusion -============================== - -Remove fields using wildcard patterns with the minus (-) operator. - -PPL query:: - - os> source=accounts | fields - *name; - fetched rows / total rows = 4/4 - +----------------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+ - | account_number | address | balance | gender | city | employer | state | age | email | - |----------------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------| - | 1 | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | - | 6 | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | - | 13 | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | - | 18 | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | - +----------------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+ - - -See Also -======== -- `table `_ - Alias command with identical functionality diff --git a/docs/user/ppl/cmd/fillnull.md b/docs/user/ppl/cmd/fillnull.md new file mode 100644 index 00000000000..40ed91e8653 --- /dev/null +++ b/docs/user/ppl/cmd/fillnull.md @@ -0,0 +1,176 @@ +# fillnull + +## Description + +The `fillnull` command fills null values with the provided value in one or more fields in the search result. +## Syntax + +fillnull with \ [in \] +fillnull using \ = \ [, \ = \] +fillnull value=\ [\] +* replacement: mandatory. The value used to replace null values. +* field-list: optional. List of fields to apply the replacement to. It can be comma-delimited (with `with` or `using` syntax) or space-delimited (with `value=` syntax). **Default:** all fields. +* field: mandatory when using `using` syntax. Individual field name to assign a specific replacement value. +* **Syntax variations** + * `with in ` - Apply same value to specified fields + * `using =, ...` - Apply different values to different fields + * `value= []` - Alternative syntax with optional space-delimited field list + +## Example 1: Replace null values with a specified value on one field + +This example shows replacing null values in the email field with '\'. + +```ppl +source=accounts +| fields email, employer +| fillnull with '' in email +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+----------+ +| email | employer | +|-----------------------+----------| +| amberduke@pyrami.com | Pyrami | +| hattiebond@netagy.com | Netagy | +| | Quility | +| daleadams@boink.com | null | ++-----------------------+----------+ +``` + +## Example 2: Replace null values with a specified value on multiple fields + +This example shows replacing null values in both email and employer fields with the same replacement value '\'. + +```ppl +source=accounts +| fields email, employer +| fillnull with '' in email, employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+-------------+ +| email | employer | +|-----------------------+-------------| +| amberduke@pyrami.com | Pyrami | +| hattiebond@netagy.com | Netagy | +| | Quility | +| daleadams@boink.com | | ++-----------------------+-------------+ +``` + +## Example 3: Replace null values with a specified value on all fields + +This example shows replacing null values in all fields when no field list is specified. + +```ppl +source=accounts +| fields email, employer +| fillnull with '' +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+-------------+ +| email | employer | +|-----------------------+-------------| +| amberduke@pyrami.com | Pyrami | +| hattiebond@netagy.com | Netagy | +| | Quility | +| daleadams@boink.com | | ++-----------------------+-------------+ +``` + +## Example 4: Replace null values with multiple specified values on multiple fields + +This example shows using different replacement values for different fields using the 'using' syntax. + +```ppl +source=accounts +| fields email, employer +| fillnull using email = '', employer = '' +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+---------------+ +| email | employer | +|-----------------------+---------------| +| amberduke@pyrami.com | Pyrami | +| hattiebond@netagy.com | Netagy | +| | Quility | +| daleadams@boink.com | | ++-----------------------+---------------+ +``` + +## Example 5: Replace null with specified value on specific fields (value= syntax) + +This example shows using the alternative 'value=' syntax to replace null values in specific fields. + +```ppl +source=accounts +| fields email, employer +| fillnull value="" email employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+-------------+ +| email | employer | +|-----------------------+-------------| +| amberduke@pyrami.com | Pyrami | +| hattiebond@netagy.com | Netagy | +| | Quility | +| daleadams@boink.com | | ++-----------------------+-------------+ +``` + +## Example 6: Replace null with specified value on all fields (value= syntax) + +When no field list is specified, the replacement applies to all fields in the result. + +```ppl +source=accounts +| fields email, employer +| fillnull value='' +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+-------------+ +| email | employer | +|-----------------------+-------------| +| amberduke@pyrami.com | Pyrami | +| hattiebond@netagy.com | Netagy | +| | Quility | +| daleadams@boink.com | | ++-----------------------+-------------+ +``` + +## Limitations + +* The `fillnull` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. +* When applying the same value to all fields without specifying field names, all fields must be the same type. For mixed types, use separate fillnull commands or explicitly specify fields. +* The replacement value type must match ALL field types in the field list. When applying the same value to multiple fields, all fields must be the same type (all strings or all numeric). + + **Example:** + +```sql ignore + # This FAILS - same value for mixed-type fields + source=accounts | fillnull value=0 firstname, age + # ERROR: fillnull failed: replacement value type INTEGER is not compatible with field 'firstname' (type: VARCHAR). The replacement value type must match the field type. +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/fillnull.rst b/docs/user/ppl/cmd/fillnull.rst deleted file mode 100644 index 7ebceee019a..00000000000 --- a/docs/user/ppl/cmd/fillnull.rst +++ /dev/null @@ -1,156 +0,0 @@ -======== -fillnull -======== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``fillnull`` command fills null values with the provided value in one or more fields in the search result. - - -Syntax -====== - -| fillnull with [in ] -| fillnull using = [, = ] -| fillnull value= [] - -* replacement: mandatory. The value used to replace null values. -* field-list: optional. List of fields to apply the replacement to. Can be comma-delimited (with ``with`` or ``using`` syntax) or space-delimited (with ``value=`` syntax). **Default:** all fields. -* field: mandatory when using ``using`` syntax. Individual field name to assign a specific replacement value. - -* **Syntax variations** - - * ``with in `` - Apply same value to specified fields - * ``using =, ...`` - Apply different values to different fields - * ``value= []`` - Alternative syntax with optional space-delimited field list - -Example 1: Replace null values with a specified value on one field -================================================================== - -This example shows replacing null values in the email field with ''. - -PPL query:: - - os> source=accounts | fields email, employer | fillnull with '' in email; - fetched rows / total rows = 4/4 - +-----------------------+----------+ - | email | employer | - |-----------------------+----------| - | amberduke@pyrami.com | Pyrami | - | hattiebond@netagy.com | Netagy | - | | Quility | - | daleadams@boink.com | null | - +-----------------------+----------+ - -Example 2: Replace null values with a specified value on multiple fields -======================================================================== - -This example shows replacing null values in both email and employer fields with the same replacement value ''. - -PPL query:: - - os> source=accounts | fields email, employer | fillnull with '' in email, employer; - fetched rows / total rows = 4/4 - +-----------------------+-------------+ - | email | employer | - |-----------------------+-------------| - | amberduke@pyrami.com | Pyrami | - | hattiebond@netagy.com | Netagy | - | | Quility | - | daleadams@boink.com | | - +-----------------------+-------------+ - -Example 3: Replace null values with a specified value on all fields -=================================================================== - -This example shows replacing null values in all fields when no field list is specified. - -PPL query:: - - PPL> source=accounts | fields email, employer | fillnull with ''; - fetched rows / total rows = 4/4 - +-----------------------+-------------+ - | email | employer | - |-----------------------+-------------| - | amberduke@pyrami.com | Pyrami | - | hattiebond@netagy.com | Netagy | - | | Quility | - | daleadams@boink.com | | - +-----------------------+-------------+ - -Example 4: Replace null values with multiple specified values on multiple fields -================================================================================ - -This example shows using different replacement values for different fields using the 'using' syntax. - -PPL query:: - - os> source=accounts | fields email, employer | fillnull using email = '', employer = ''; - fetched rows / total rows = 4/4 - +-----------------------+---------------+ - | email | employer | - |-----------------------+---------------| - | amberduke@pyrami.com | Pyrami | - | hattiebond@netagy.com | Netagy | - | | Quility | - | daleadams@boink.com | | - +-----------------------+---------------+ - - -Example 5: Replace null with specified value on specific fields (value= syntax) -=============================================================================== - -This example shows using the alternative 'value=' syntax to replace null values in specific fields. - -PPL query:: - - os> source=accounts | fields email, employer | fillnull value="" email employer; - fetched rows / total rows = 4/4 - +-----------------------+-------------+ - | email | employer | - |-----------------------+-------------| - | amberduke@pyrami.com | Pyrami | - | hattiebond@netagy.com | Netagy | - | | Quility | - | daleadams@boink.com | | - +-----------------------+-------------+ - -Example 6: Replace null with specified value on all fields (value= syntax) -========================================================================== - -When no field list is specified, the replacement applies to all fields in the result. - -PPL query:: - - os> source=accounts | fields email, employer | fillnull value=''; - fetched rows / total rows = 4/4 - +-----------------------+-------------+ - | email | employer | - |-----------------------+-------------| - | amberduke@pyrami.com | Pyrami | - | hattiebond@netagy.com | Netagy | - | | Quility | - | daleadams@boink.com | | - +-----------------------+-------------+ - -Limitations -=========== -* The ``fillnull`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. -* When applying the same value to all fields without specifying field names, all fields must be the same type. For mixed types, use separate fillnull commands or explicitly specify fields. -* The replacement value type must match ALL field types in the field list. When applying the same value to multiple fields, all fields must be the same type (all strings or all numeric). - - **Example:** - - .. code-block:: sql - - # This FAILS - same value for mixed-type fields - source=accounts | fillnull value=0 firstname, age - # ERROR: fillnull failed: replacement value type INTEGER is not compatible with field 'firstname' (type: VARCHAR). The replacement value type must match the field type. - diff --git a/docs/user/ppl/cmd/flatten.md b/docs/user/ppl/cmd/flatten.md new file mode 100644 index 00000000000..ba4f9077dcb --- /dev/null +++ b/docs/user/ppl/cmd/flatten.md @@ -0,0 +1,93 @@ +# flatten + +## Description + +The `flatten` command flattens a struct or an object field into separate fields in a document. +The flattened fields will be ordered **lexicographically** by their original key names in the struct. For example, if the struct has keys `b`, `c` and `Z`, the flattened fields will be ordered as `Z`, `b`, `c`. +Note that `flatten` should not be applied to arrays. Use the `expand` command to expand an array field into multiple rows instead. However, since an array can be stored in a non-array field in OpenSearch, when flattening a field storing a nested array, only the first element of the array will be flattened. +## Syntax + +flatten \ [as (\)] +* field: mandatory. The field to be flattened. Only object and nested fields are supported. +* alias-list: optional. The names to use instead of the original key names. Names are separated by commas. It is advised to put the alias-list in parentheses if there is more than one alias. The length must match the number of keys in the struct field. The provided alias names **must** follow the lexicographical order of the corresponding original keys in the struct. + +## Example: flatten an object field with aliases + +This example shows flattening a message object field and using aliases to rename the flattened fields. +Given the following index `my-index` + +```text + {"message":{"info":"a","author":"e","dayOfWeek":1},"myNum":1} + {"message":{"info":"b","author":"f","dayOfWeek":2},"myNum":2} + +``` + +with the following mapping: + +```json + { + "mappings": { + "properties": { + "message": { + "type": "object", + "properties": { + "info": { + "type": "keyword", + "index": "true" + }, + "author": { + "type": "keyword", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "index": "true" + }, + "dayOfWeek": { + "type": "long" + } + } + }, + "myNum": { + "type": "long" + } + } + } + } + + +``` + +The following query flattens the `message` field and renames the keys to +`creator, dow, info`: + +```ppl +source=my-index +| flatten message as (creator, dow, info) +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------------------------------------+--------+---------+-----+------+ +| message | myNum | creator | dow | info | +|-----------------------------------------|--------|---------|-----|------| +| {"info":"a","author":"e","dayOfWeek":1} | 1 | e | 1 | a | +| {"info":"b","author":"f","dayOfWeek":2} | 2 | f | 2 | b | ++-----------------------------------------+--------+---------+-----+------+ +``` + +## Limitations + +* `flatten` command may not work as expected when its flattened fields are + + invisible. + For example in query + `source=my-index | fields message | flatten message`, the + `flatten message` command doesn't work since some flattened fields such as + `message.info` and `message.author` after command `fields message` are + invisible. + As an alternative, you can change to `source=my-index | flatten message`. \ No newline at end of file diff --git a/docs/user/ppl/cmd/flatten.rst b/docs/user/ppl/cmd/flatten.rst deleted file mode 100644 index e366fe32daa..00000000000 --- a/docs/user/ppl/cmd/flatten.rst +++ /dev/null @@ -1,101 +0,0 @@ -======= -flatten -======= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - -Description -=========== -| The ``flatten`` command flattens a struct or an object field into separate fields in a document. - -| The flattened fields will be ordered **lexicographically** by their original key names in the struct. For example, if the struct has keys ``b``, ``c`` and ``Z``, the flattened fields will be ordered as ``Z``, ``b``, ``c``. - -| Note that ``flatten`` should not be applied to arrays. Use the ``expand`` command to expand an array field into multiple rows instead. However, since an array can be stored in a non-array field in OpenSearch, when flattening a field storing a nested array, only the first element of the array will be flattened. - -Syntax -====== - -flatten [as ()] - -* field: mandatory. The field to be flattened. Only object and nested fields are supported. -* alias-list: optional. The names to use instead of the original key names. Names are separated by commas. It is advised to put the alias-list in parentheses if there is more than one alias. The length must match the number of keys in the struct field. The provided alias names **must** follow the lexicographical order of the corresponding original keys in the struct. - -Example: flatten an object field with aliases -============================================= - -This example shows flattening a message object field and using aliases to rename the flattened fields. - -Given the following index ``my-index`` - -.. code-block:: - - {"message":{"info":"a","author":"e","dayOfWeek":1},"myNum":1} - {"message":{"info":"b","author":"f","dayOfWeek":2},"myNum":2} - -with the following mapping: - -.. code-block:: json - - { - "mappings": { - "properties": { - "message": { - "type": "object", - "properties": { - "info": { - "type": "keyword", - "index": "true" - }, - "author": { - "type": "keyword", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - }, - "index": "true" - }, - "dayOfWeek": { - "type": "long" - } - } - }, - "myNum": { - "type": "long" - } - } - } - } - - -The following query flattens the ``message`` field and renames the keys to -``creator, dow, info``: - -PPL query:: - - PPL> source=my-index | flatten message as (creator, dow, info); - fetched rows / total rows = 2/2 - +-----------------------------------------+--------+---------+-----+------+ - | message | myNum | creator | dow | info | - |-----------------------------------------|--------|---------|-----|------| - | {"info":"a","author":"e","dayOfWeek":1} | 1 | e | 1 | a | - | {"info":"b","author":"f","dayOfWeek":2} | 2 | f | 2 | b | - +-----------------------------------------+--------+---------+-----+------+ - -Limitations -=========== -* ``flatten`` command may not work as expected when its flattened fields are - invisible. - - For example in query - ``source=my-index | fields message | flatten message``, the - ``flatten message`` command doesn't work since some flattened fields such as - ``message.info`` and ``message.author`` after command ``fields message`` are - invisible. - - As an alternative, you can change to ``source=my-index | flatten message``. diff --git a/docs/user/ppl/cmd/grok.md b/docs/user/ppl/cmd/grok.md new file mode 100644 index 00000000000..c2636b5358b --- /dev/null +++ b/docs/user/ppl/cmd/grok.md @@ -0,0 +1,86 @@ +# grok + +## Description + +The `grok` command parses a text field with a grok pattern and appends the results to the search result. +## Syntax + +grok \ \ +* field: mandatory. The field must be a text field. +* pattern: mandatory. The grok pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. + +## Example 1: Create the new field + +This example shows how to create new field `host` for each document. `host` will be the host name after `@` in `email` field. Parsing a null field will return an empty string. + +```ppl +source=accounts +| grok email '.+@%{HOSTNAME:host}' +| fields email, host +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+------------+ +| email | host | +|-----------------------+------------| +| amberduke@pyrami.com | pyrami.com | +| hattiebond@netagy.com | netagy.com | +| null | | +| daleadams@boink.com | boink.com | ++-----------------------+------------+ +``` + +## Example 2: Override the existing field + +This example shows how to override the existing `address` field with street number removed. + +```ppl +source=accounts +| grok address '%{NUMBER} %{GREEDYDATA:address}' +| fields address +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++------------------+ +| address | +|------------------| +| Holmes Lane | +| Bristol Street | +| Madison Street | +| Hutchinson Court | ++------------------+ +``` + +## Example 3: Using grok to parse logs + +This example shows how to use grok to parse raw logs. + +```ppl +source=apache +| grok message '%{COMMONAPACHELOG}' +| fields COMMONAPACHELOG, timestamp, response, bytes +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------+ +| COMMONAPACHELOG | timestamp | response | bytes | +|-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------| +| 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | 28/Sep/2022:10:15:57 -0700 | 404 | 19927 | +| 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | 28/Sep/2022:10:15:57 -0700 | 100 | 28722 | +| 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | 28/Sep/2022:10:15:57 -0700 | 401 | 27439 | +| 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | 28/Sep/2022:10:15:57 -0700 | 301 | 9481 | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------+ +``` + +## Limitations + +The grok command has the same limitations as the parse command, see [parse limitations](./parse.md#Limitations) for details. \ No newline at end of file diff --git a/docs/user/ppl/cmd/grok.rst b/docs/user/ppl/cmd/grok.rst deleted file mode 100644 index 836d01b6a89..00000000000 --- a/docs/user/ppl/cmd/grok.rst +++ /dev/null @@ -1,81 +0,0 @@ -==== -grok -==== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``grok`` command parses a text field with a grok pattern and appends the results to the search result. - -Syntax -====== -grok - -* field: mandatory. The field must be a text field. -* pattern: mandatory. The grok pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. - -Example 1: Create the new field -=============================== - -This example shows how to create new field ``host`` for each document. ``host`` will be the host name after ``@`` in ``email`` field. Parsing a null field will return an empty string. - -PPL query:: - - os> source=accounts | grok email '.+@%{HOSTNAME:host}' | fields email, host ; - fetched rows / total rows = 4/4 - +-----------------------+------------+ - | email | host | - |-----------------------+------------| - | amberduke@pyrami.com | pyrami.com | - | hattiebond@netagy.com | netagy.com | - | null | | - | daleadams@boink.com | boink.com | - +-----------------------+------------+ - - -Example 2: Override the existing field -====================================== - -This example shows how to override the existing ``address`` field with street number removed. - -PPL query:: - - os> source=accounts | grok address '%{NUMBER} %{GREEDYDATA:address}' | fields address ; - fetched rows / total rows = 4/4 - +------------------+ - | address | - |------------------| - | Holmes Lane | - | Bristol Street | - | Madison Street | - | Hutchinson Court | - +------------------+ - -Example 3: Using grok to parse logs -=================================== - -This example shows how to use grok to parse raw logs. - -PPL query:: - - os> source=apache | grok message '%{COMMONAPACHELOG}' | fields COMMONAPACHELOG, timestamp, response, bytes ; - fetched rows / total rows = 4/4 - +-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------+ - | COMMONAPACHELOG | timestamp | response | bytes | - |-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------| - | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | 28/Sep/2022:10:15:57 -0700 | 404 | 19927 | - | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | 28/Sep/2022:10:15:57 -0700 | 100 | 28722 | - | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | 28/Sep/2022:10:15:57 -0700 | 401 | 27439 | - | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | 28/Sep/2022:10:15:57 -0700 | 301 | 9481 | - +-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------+ - -Limitations -=========== - -The grok command has the same limitations as the parse command, see `parse limitations <./parse.rst#Limitations>`_ for details. diff --git a/docs/user/ppl/cmd/head.md b/docs/user/ppl/cmd/head.md new file mode 100644 index 00000000000..5565c90d782 --- /dev/null +++ b/docs/user/ppl/cmd/head.md @@ -0,0 +1,84 @@ +# head + +## Description + +The `head` command returns the first N number of specified results after an optional offset in search order. +## Syntax + +head [\] [from \] +* size: optional integer. Number of results to return. **Default:** 10 +* offset: optional integer after `from`. Number of results to skip. **Default:** 0 + +## Example 1: Get first 10 results + +This example shows getting a maximum of 10 results from accounts index. + +```ppl +source=accounts +| fields firstname, age +| head +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+ +| firstname | age | +|-----------+-----| +| Amber | 32 | +| Hattie | 36 | +| Nanette | 28 | +| Dale | 33 | ++-----------+-----+ +``` + +## Example 2: Get first N results + +This example shows getting the first 3 results from accounts index. + +```ppl +source=accounts +| fields firstname, age +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----------+-----+ +| firstname | age | +|-----------+-----| +| Amber | 32 | +| Hattie | 36 | +| Nanette | 28 | ++-----------+-----+ +``` + +## Example 3: Get first N results after offset M + +This example shows getting the first 3 results after offset 1 from accounts index. + +```ppl +source=accounts +| fields firstname, age +| head 3 from 1 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----------+-----+ +| firstname | age | +|-----------+-----| +| Hattie | 36 | +| Nanette | 28 | +| Dale | 33 | ++-----------+-----+ +``` + +## Limitations + +The `head` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. \ No newline at end of file diff --git a/docs/user/ppl/cmd/head.rst b/docs/user/ppl/cmd/head.rst deleted file mode 100644 index a17f283026d..00000000000 --- a/docs/user/ppl/cmd/head.rst +++ /dev/null @@ -1,77 +0,0 @@ -==== -head -==== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``head`` command returns the first N number of specified results after an optional offset in search order. - -Syntax -====== -head [] [from ] - -* size: optional integer. Number of results to return. **Default:** 10 -* offset: optional integer after ``from``. Number of results to skip. **Default:** 0 - -Example 1: Get first 10 results -=============================== - -This example shows getting a maximum of 10 results from accounts index. - -PPL query:: - - os> source=accounts | fields firstname, age | head; - fetched rows / total rows = 4/4 - +-----------+-----+ - | firstname | age | - |-----------+-----| - | Amber | 32 | - | Hattie | 36 | - | Nanette | 28 | - | Dale | 33 | - +-----------+-----+ - -Example 2: Get first N results -============================== - -This example shows getting the first 3 results from accounts index. - -PPL query:: - - os> source=accounts | fields firstname, age | head 3; - fetched rows / total rows = 3/3 - +-----------+-----+ - | firstname | age | - |-----------+-----| - | Amber | 32 | - | Hattie | 36 | - | Nanette | 28 | - +-----------+-----+ - -Example 3: Get first N results after offset M -============================================= - -This example shows getting the first 3 results after offset 1 from accounts index. - -PPL query:: - - os> source=accounts | fields firstname, age | head 3 from 1; - fetched rows / total rows = 3/3 - +-----------+-----+ - | firstname | age | - |-----------+-----| - | Hattie | 36 | - | Nanette | 28 | - | Dale | 33 | - +-----------+-----+ - -Limitations -=========== -The ``head`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. diff --git a/docs/user/ppl/cmd/join.md b/docs/user/ppl/cmd/join.md new file mode 100644 index 00000000000..39d3f5a24d2 --- /dev/null +++ b/docs/user/ppl/cmd/join.md @@ -0,0 +1,214 @@ +# join + +## Description + +The `join` command combines two datasets together. The left side could be an index or results from a piped commands, the right side could be either an index or a subsearch. +## Syntax + + +### Basic syntax: + +[joinType] join [leftAlias] [rightAlias] (on \| where) \ \ +* joinType: optional. The type of join to perform. Options: `left`, `semi`, `anti`, and performance-sensitive types `right`, `full`, `cross`. **Default:** `inner`. +* leftAlias: optional. The subsearch alias to use with the left join side, to avoid ambiguous naming. Pattern: `left = ` +* rightAlias: optional. The subsearch alias to use with the right join side, to avoid ambiguous naming. Pattern: `right = ` +* joinCriteria: mandatory. Any comparison expression. Must follow `on` or `where` keyword. +* right-dataset: mandatory. Right dataset could be either an `index` or a `subsearch` with/without alias. + +### Extended syntax: + +join [type=] [overwrite=] [max=n] (\ \| [leftAlias] [rightAlias] (on \| where) \) \ +* type: optional. Join type using extended syntax. Options: `left`, `outer` (alias of `left`), `semi`, `anti`, and performance-sensitive types `right`, `full`, `cross`. **Default:** `inner`. +* overwrite: optional boolean. Only works with `join-field-list`. Specifies whether duplicate-named fields from right-dataset should replace corresponding fields in the main search results. **Default:** `true`. +* max: optional integer. Controls how many subsearch results could be joined against each row in main search. **Default:** 0 (unlimited). +* join-field-list: optional. The fields used to build the join criteria. The join field list must exist on both sides. If not specified, all fields common to both sides will be used as join keys. +* leftAlias: optional. Same as basic syntax when used with extended syntax. +* rightAlias: optional. Same as basic syntax when used with extended syntax. +* joinCriteria: mandatory. Same as basic syntax when used with extended syntax. +* right-dataset: mandatory. Same as basic syntax. + +## Configuration + +### plugins.ppl.join.subsearch_maxout + +The size configures the maximum of rows from subsearch to join against. The default value is: `50000`. A value of `0` indicates that the restriction is unlimited. +Change the join.subsearch_maxout to 5000 + +```bash ignore +curl -sS -H 'Content-Type: application/json' \ +-X PUT localhost:9200/_plugins/_query/settings \ +-d '{"persistent" : {"plugins.ppl.join.subsearch_maxout" : "5000"}}' +``` + +```json +{ + "acknowledged": true, + "persistent": { + "plugins": { + "ppl": { + "join": { + "subsearch_maxout": "5000" + } + } + } + }, + "transient": {} +} +``` + +## Usage + +Basic join syntax: + +``` +source = table1 | inner join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c +source = table1 | inner join left = l right = r where l.a = r.a table2 | fields l.a, r.a, b, c +source = table1 | left join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c +source = table1 | right join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c +source = table1 | full left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c +source = table1 | cross join left = l right = r on 1=1 table2 +source = table1 | left semi join left = l right = r on l.a = r.a table2 +source = table1 | left anti join left = l right = r on l.a = r.a table2 +source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] +source = table1 | inner join on table1.a = table2.a table2 | fields table1.a, table2.a, table1.b, table1.c +source = table1 | inner join on a = c table2 | fields a, b, c, d +source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields l.a, r.a +source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields t1.a, t2.a +source = table1 | join left = l right = r on l.a = r.a [ source = table2 ] as s | fields l.a, s.a +``` + +Extended syntax with options: + +``` +source = table1 | join type=outer left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c +source = table1 | join type=left left = l right = r where l.a = r.a table2 | fields l.a, r.a, b, c +source = table1 | join type=inner max=1 left = l right = r where l.a = r.a table2 | fields l.a, r.a, b, c +source = table1 | join a table2 | fields a, b, c +source = table1 | join a, b table2 | fields a, b, c +source = table1 | join type=outer a b table2 | fields a, b, c +source = table1 | join type=inner max=1 a, b table2 | fields a, b, c +source = table1 | join type=left overwrite=false max=0 a, b [source=table2 | rename d as b] | fields a, b, c +``` + +## Example 1: Two indices join + +This example shows joining two indices using the basic join syntax. + +```ppl +source = state_country +| inner join left=a right=b ON a.name = b.name occupation +| stats avg(salary) by span(age, 10) as age_span, b.country +``` + +Expected output: + +```text +fetched rows / total rows = 5/5 ++-------------+----------+-----------+ +| avg(salary) | age_span | b.country | +|-------------+----------+-----------| +| 120000.0 | 40 | USA | +| 105000.0 | 20 | Canada | +| 0.0 | 40 | Canada | +| 70000.0 | 30 | USA | +| 100000.0 | 70 | England | ++-------------+----------+-----------+ +``` + +## Example 2: Join with subsearch + +This example shows joining with a subsearch using the basic join syntax. + +```ppl +source = state_country as a +| where country = 'USA' OR country = 'England' +| left join ON a.name = b.name [ source = occupation +| where salary > 0 +| fields name, country, salary +| sort salary +| head 3 ] as b +| stats avg(salary) by span(age, 10) as age_span, b.country +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-------------+----------+-----------+ +| avg(salary) | age_span | b.country | +|-------------+----------+-----------| +| null | 40 | null | +| 70000.0 | 30 | USA | +| 100000.0 | 70 | England | ++-------------+----------+-----------+ +``` + +## Example 3: Join with field list + +This example shows joining using the extended syntax with field list. + +```ppl +source = state_country +| where country = 'USA' OR country = 'England' +| join type=left overwrite=true name [ source = occupation +| where salary > 0 +| fields name, country, salary +| sort salary +| head 3 ] +| stats avg(salary) by span(age, 10) as age_span, country +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-------------+----------+---------+ +| avg(salary) | age_span | country | +|-------------+----------+---------| +| null | 40 | null | +| 70000.0 | 30 | USA | +| 100000.0 | 70 | England | ++-------------+----------+---------+ +``` + +## Example 4: Join with options + +This example shows joining using the extended syntax with additional options. + +```ppl +source = state_country +| join type=inner overwrite=false max=1 name occupation +| stats avg(salary) by span(age, 10) as age_span, country +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-------------+----------+---------+ +| avg(salary) | age_span | country | +|-------------+----------+---------| +| 120000.0 | 40 | USA | +| 100000.0 | 70 | USA | +| 105000.0 | 20 | Canada | +| 70000.0 | 30 | USA | ++-------------+----------+---------+ +``` + +## Limitations + +For basic syntax, if fields in the left outputs and right outputs have the same name. Typically, in the join criteria +`ON t1.id = t2.id`, the names `id` in output are ambiguous. To avoid ambiguous, the ambiguous +fields in output rename to `.id`, or else `.id` if no alias existing. + +Assume table1 and table2 only contain field `id`, following PPL queries and their outputs are: + +| Query | Output | +| --- | --- | +| source=table1 \| join left=t1 right=t2 on t1.id=t2.id table2 \| eval a = 1 | t1.id, t2.id, a | +| source=table1 \| join on table1.id=table2.id table2 \| eval a = 1 | table1.id, table2.id, a | +| source=table1 \| join on table1.id=t2.id table2 as t2 \| eval a = 1 | table1.id, t2.id, a | +| source=table1 \| join right=tt on table1.id=t2.id [ source=table2 as t2 \| eval b = id ] \| eval a = 1 | table1.id, tt.id, tt.b, a | + +For extended syntax (join with field list), when duplicate-named fields in output results are deduplicated, the fields in output determined by the value of 'overwrite' option. +Join types `inner`, `left`, `outer` (alias of `left`), `semi` and `anti` are supported by default. `right`, `full`, `cross` are performance-sensitive join types which are disabled by default. Set config `plugins.calcite.all_join_types.allowed = true` to enable. \ No newline at end of file diff --git a/docs/user/ppl/cmd/join.rst b/docs/user/ppl/cmd/join.rst deleted file mode 100644 index 61dfc31042d..00000000000 --- a/docs/user/ppl/cmd/join.rst +++ /dev/null @@ -1,198 +0,0 @@ -==== -join -==== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``join`` command combines two datasets together. The left side could be an index or results from a piped commands, the right side could be either an index or a subsearch. - -Syntax -====== - -Basic syntax: -------------- - -[joinType] join [leftAlias] [rightAlias] (on | where) - -* joinType: optional. The type of join to perform. Options: ``left``, ``semi``, ``anti``, and performance sensitive types ``right``, ``full``, ``cross``. **Default:** ``inner``. -* leftAlias: optional. The subsearch alias to use with the left join side, to avoid ambiguous naming. Pattern: ``left = `` -* rightAlias: optional. The subsearch alias to use with the right join side, to avoid ambiguous naming. Pattern: ``right = `` -* joinCriteria: mandatory. Any comparison expression. Must follow ``on`` or ``where`` keyword. -* right-dataset: mandatory. Right dataset could be either an ``index`` or a ``subsearch`` with/without alias. - -Extended syntax: ----------------- - -join [type=] [overwrite=] [max=n] ( | [leftAlias] [rightAlias] (on | where) ) - -* type: optional. Join type using extended syntax. Options: ``left``, ``outer`` (alias of ``left``), ``semi``, ``anti``, and performance sensitive types ``right``, ``full``, ``cross``. **Default:** ``inner``. -* overwrite: optional boolean. Only works with ``join-field-list``. Specifies whether duplicate-named fields from right-dataset should replace corresponding fields in the main search results. **Default:** ``true``. -* max: optional integer. Controls how many subsearch results could be joined against each row in main search. **Default:** 0 (unlimited). -* join-field-list: optional. The fields used to build the join criteria. The join field list must exist on both sides. If not specified, all fields common to both sides will be used as join keys. -* leftAlias: optional. Same as basic syntax when used with extended syntax. -* rightAlias: optional. Same as basic syntax when used with extended syntax. -* joinCriteria: mandatory. Same as basic syntax when used with extended syntax. -* right-dataset: mandatory. Same as basic syntax. - -Configuration -============= - -plugins.ppl.join.subsearch_maxout ---------------------------------- - -The size configures the maximum of rows from subsearch to join against. The default value is: ``50000``. A value of ``0`` indicates that the restriction is unlimited. - -Change the join.subsearch_maxout to 5000:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"persistent" : {"plugins.ppl.join.subsearch_maxout" : "5000"}}' - { - "acknowledged": true, - "persistent": { - "plugins": { - "ppl": { - "join": { - "subsearch_maxout": "5000" - } - } - } - }, - "transient": {} - } - - -Usage -===== - -Basic join syntax:: - - source = table1 | inner join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c - source = table1 | inner join left = l right = r where l.a = r.a table2 | fields l.a, r.a, b, c - source = table1 | left join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c - source = table1 | right join left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c - source = table1 | full left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c - source = table1 | cross join left = l right = r on 1=1 table2 - source = table1 | left semi join left = l right = r on l.a = r.a table2 - source = table1 | left anti join left = l right = r on l.a = r.a table2 - source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] - source = table1 | inner join on table1.a = table2.a table2 | fields table1.a, table2.a, table1.b, table1.c - source = table1 | inner join on a = c table2 | fields a, b, c, d - source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields l.a, r.a - source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields t1.a, t2.a - source = table1 | join left = l right = r on l.a = r.a [ source = table2 ] as s | fields l.a, s.a - -Extended syntax with options:: - - source = table1 | join type=outer left = l right = r on l.a = r.a table2 | fields l.a, r.a, b, c - source = table1 | join type=left left = l right = r where l.a = r.a table2 | fields l.a, r.a, b, c - source = table1 | join type=inner max=1 left = l right = r where l.a = r.a table2 | fields l.a, r.a, b, c - source = table1 | join a table2 | fields a, b, c - source = table1 | join a, b table2 | fields a, b, c - source = table1 | join type=outer a b table2 | fields a, b, c - source = table1 | join type=inner max=1 a, b table2 | fields a, b, c - source = table1 | join type=left overwrite=false max=0 a, b [source=table2 | rename d as b] | fields a, b, c - -Example 1: Two indices join -=========================== - -This example shows joining two indices using the basic join syntax. - -PPL query:: - - os> source = state_country | inner join left=a right=b ON a.name = b.name occupation | stats avg(salary) by span(age, 10) as age_span, b.country; - fetched rows / total rows = 5/5 - +-------------+----------+-----------+ - | avg(salary) | age_span | b.country | - |-------------+----------+-----------| - | 120000.0 | 40 | USA | - | 105000.0 | 20 | Canada | - | 0.0 | 40 | Canada | - | 70000.0 | 30 | USA | - | 100000.0 | 70 | England | - +-------------+----------+-----------+ - -Example 2: Join with subsearch -============================== - -This example shows joining with a subsearch using the basic join syntax. - -PPL query:: - - PPL> source = state_country as a | where country = 'USA' OR country = 'England' | left join ON a.name = b.name [ source = occupation | where salary > 0 | fields name, country, salary | sort salary | head 3 ] as b | stats avg(salary) by span(age, 10) as age_span, b.country; - fetched rows / total rows = 3/3 - +-------------+----------+-----------+ - | avg(salary) | age_span | b.country | - |-------------+----------+-----------| - | null | 40 | null | - | 70000.0 | 30 | USA | - | 100000.0 | 70 | England | - +-------------+----------+-----------+ - -Example 3: Join with field list -=============================== - -This example shows joining using the extended syntax with field list. - -PPL query:: - - PPL> source = state_country | where country = 'USA' OR country = 'England' | join type=left overwrite=true name [ source = occupation | where salary > 0 | fields name, country, salary | sort salary | head 3 ] | stats avg(salary) by span(age, 10) as age_span, country; - fetched rows / total rows = 3/3 - +-------------+----------+---------+ - | avg(salary) | age_span | country | - |-------------+----------+---------| - | null | 40 | null | - | 70000.0 | 30 | USA | - | 100000.0 | 70 | England | - +-------------+----------+---------+ - -Example 4: Join with options -============================ - -This example shows joining using the extended syntax with additional options. - -PPL query:: - - os> source = state_country | join type=inner overwrite=false max=1 name occupation | stats avg(salary) by span(age, 10) as age_span, country; - fetched rows / total rows = 4/4 - +-------------+----------+---------+ - | avg(salary) | age_span | country | - |-------------+----------+---------| - | 120000.0 | 40 | USA | - | 100000.0 | 70 | USA | - | 105000.0 | 20 | Canada | - | 70000.0 | 30 | USA | - +-------------+----------+---------+ - -Limitations -=========== -For basic syntax, if fields in the left outputs and right outputs have the same name. Typically, in the join criteria -``ON t1.id = t2.id``, the names ``id`` in output are ambiguous. To avoid ambiguous, the ambiguous -fields in output rename to ``.id``, or else ``.id`` if no alias existing. - -Assume table1 and table2 only contain field ``id``, following PPL queries and their outputs are: - -.. list-table:: - :widths: 75 25 - :header-rows: 1 - - * - Query - - Output - * - source=table1 | join left=t1 right=t2 on t1.id=t2.id table2 | eval a = 1 - - t1.id, t2.id, a - * - source=table1 | join on table1.id=table2.id table2 | eval a = 1 - - table1.id, table2.id, a - * - source=table1 | join on table1.id=t2.id table2 as t2 | eval a = 1 - - table1.id, t2.id, a - * - source=table1 | join right=tt on table1.id=t2.id [ source=table2 as t2 | eval b = id ] | eval a = 1 - - table1.id, tt.id, tt.b, a - -| For extended syntax (join with field list), when duplicate-named fields in output results are deduplicated, the fields in output determined by the value of 'overwrite' option. -| Join types ``inner``, ``left``, ``outer`` (alias of ``left``), ``semi`` and ``anti`` are supported by default. ``right``, ``full``, ``cross`` are performance sensitive join types which are disabled by default. Set config ``plugins.calcite.all_join_types.allowed = true`` to enable. diff --git a/docs/user/ppl/cmd/kmeans.md b/docs/user/ppl/cmd/kmeans.md new file mode 100644 index 00000000000..247902804df --- /dev/null +++ b/docs/user/ppl/cmd/kmeans.md @@ -0,0 +1,37 @@ +# kmeans (deprecated by ml command) + +## Description + +The `kmeans` command applies the kmeans algorithm in the ml-commons plugin on the search result returned by a PPL command. +## Syntax + +kmeans \ \ \ +* centroids: optional. The number of clusters you want to group your data points into. **Default:** 2. +* iterations: optional. Number of iterations. **Default:** 10. +* distance_type: optional. The distance type can be COSINE, L1, or EUCLIDEAN. **Default:** EUCLIDEAN. + +## Example: Clustering of Iris Dataset + +This example shows how to classify three Iris species (Iris setosa, Iris virginica and Iris versicolor) based on the combination of four features measured from each sample: the length and the width of the sepals and petals. + +```ppl +source=iris_data +| fields sepal_length_in_cm, sepal_width_in_cm, petal_length_in_cm, petal_width_in_cm +| kmeans centroids=3 +``` + +Expected output: + +```text ++--------------------+-------------------+--------------------+-------------------+-----------+ +| sepal_length_in_cm | sepal_width_in_cm | petal_length_in_cm | petal_width_in_cm | ClusterID | +|--------------------+-------------------+--------------------+-------------------+-----------| +| 5.1 | 3.5 | 1.4 | 0.2 | 1 | +| 5.6 | 3.0 | 4.1 | 1.3 | 0 | +| 6.7 | 2.5 | 5.8 | 1.8 | 2 | ++--------------------+-------------------+--------------------+-------------------+-----------+ +``` + +## Limitations + +The `kmeans` command can only work with `plugins.calcite.enabled=false`. \ No newline at end of file diff --git a/docs/user/ppl/cmd/kmeans.rst b/docs/user/ppl/cmd/kmeans.rst deleted file mode 100644 index ca4ba255c7e..00000000000 --- a/docs/user/ppl/cmd/kmeans.rst +++ /dev/null @@ -1,44 +0,0 @@ -================================= -kmeans (deprecated by ml command) -================================= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``kmeans`` command applies the kmeans algorithm in the ml-commons plugin on the search result returned by a PPL command. - -Syntax -====== -kmeans - -* centroids: optional. The number of clusters you want to group your data points into. **Default:** 2. -* iterations: optional. Number of iterations. **Default:** 10. -* distance_type: optional. The distance type can be COSINE, L1, or EUCLIDEAN. **Default:** EUCLIDEAN. - - -Example: Clustering of Iris Dataset -=================================== - -This example shows how to classify three Iris species (Iris setosa, Iris virginica and Iris versicolor) based on the combination of four features measured from each sample: the length and the width of the sepals and petals. - -PPL query:: - - > source=iris_data | fields sepal_length_in_cm, sepal_width_in_cm, petal_length_in_cm, petal_width_in_cm | kmeans centroids=3 - +--------------------+-------------------+--------------------+-------------------+-----------+ - | sepal_length_in_cm | sepal_width_in_cm | petal_length_in_cm | petal_width_in_cm | ClusterID | - |--------------------+-------------------+--------------------+-------------------+-----------| - | 5.1 | 3.5 | 1.4 | 0.2 | 1 | - | 5.6 | 3.0 | 4.1 | 1.3 | 0 | - | 6.7 | 2.5 | 5.8 | 1.8 | 2 | - +--------------------+-------------------+--------------------+-------------------+-----------+ - - -Limitations -=========== -The ``kmeans`` command can only work with ``plugins.calcite.enabled=false``. \ No newline at end of file diff --git a/docs/user/ppl/cmd/lookup.md b/docs/user/ppl/cmd/lookup.md new file mode 100644 index 00000000000..03683cdc47b --- /dev/null +++ b/docs/user/ppl/cmd/lookup.md @@ -0,0 +1,339 @@ +# lookup + +## Description + +The `lookup` command enriches your search data by adding or replacing data from a lookup index (dimension table). You can extend fields of an index with values from a dimension table, append or replace values when lookup condition is matched. As an alternative of join command, lookup command is more suitable for enriching the source data with a static dataset. +## Syntax + +lookup \ (\ [as \])... [(replace \| append) (\ [as \])...] +* lookupIndex: mandatory. The name of lookup index (dimension table). +* lookupMappingField: mandatory. A mapping key in `lookupIndex`, analogy to a join key from right table. You can specify multiple `lookupMappingField` with comma-delimited. +* sourceMappingField: optional. A mapping key from source (left side), analogy to a join key from left side. If not specified, defaults to `lookupMappingField`. +* inputField: optional. A field in `lookupIndex` where matched values are applied to result output. You can specify multiple `inputField` with comma-delimited. If not specified, all fields except `lookupMappingField` from `lookupIndex` are applied to result output. +* outputField: optional. A field of output. You can specify zero or multiple `outputField`. If `outputField` has an existing field name in source query, its values will be replaced or appended by matched values from `inputField`. If the field specified in `outputField` is a new field, in replace strategy, an extended new field will be applied to the results, but fail in append strategy. +* replace \| append: optional. The output strategies. If replace, matched values in `lookupIndex` field overwrite the values in result. If append, matched values in `lookupIndex` field only append to the missing values in result. **Default:** replace. + +## Usage + +Lookup + +``` +source = table1 | lookup table2 id +source = table1 | lookup table2 id, name +source = table1 | lookup table2 id as cid, name +source = table1 | lookup table2 id as cid, name replace dept as department +source = table1 | lookup table2 id as cid, name replace dept as department, city as location +source = table1 | lookup table2 id as cid, name append dept as department +source = table1 | lookup table2 id as cid, name append dept as department, city as location +``` + +## Example 1: Replace strategy + +This example shows using the lookup command with the REPLACE strategy to overwrite existing values. + +```bash ignore +curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ + "query" : """ + source = worker + | LOOKUP work_information uid AS id REPLACE department + | fields id, name, occupation, country, salary, department + """ +}' +``` + +Result set + +```json +{ + "schema": [ + { + "name": "id", + "type": "integer" + }, + { + "name": "name", + "type": "string" + }, + { + "name": "occupation", + "type": "string" + }, + { + "name": "country", + "type": "string" + }, + { + "name": "salary", + "type": "integer" + }, + { + "name": "department", + "type": "string" + } + ], + "datarows": [ + [ + 1000, + "Jake", + "Engineer", + "England", + 100000, + "IT" + ], + [ + 1001, + "Hello", + "Artist", + "USA", + 70000, + null + ], + [ + 1002, + "John", + "Doctor", + "Canada", + 120000, + "DATA" + ], + [ + 1003, + "David", + "Doctor", + null, + 120000, + "HR" + ], + [ + 1004, + "David", + null, + "Canada", + 0, + null + ], + [ + 1005, + "Jane", + "Scientist", + "Canada", + 90000, + "DATA" + ] + ], + "total": 6, + "size": 6 +} +``` + +## Example 2: Append strategy + +This example shows using the lookup command with the APPEND strategy to fill missing values only. + +```bash ignore +curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ + "query" : """ + source = worker + | LOOKUP work_information uid AS id APPEND department + | fields id, name, occupation, country, salary, department + """ +}' +``` + +## Example 3: No inputField specified + +This example shows using the lookup command without specifying inputField, which applies all fields from the lookup index. + +```bash ignore +curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ + "query" : """ + source = worker + | LOOKUP work_information uid AS id, name + | fields id, name, occupation, country, salary, department + """ +}' +``` + +Result set + +```json +{ + "schema": [ + { + "name": "id", + "type": "integer" + }, + { + "name": "name", + "type": "string" + }, + { + "name": "country", + "type": "string" + }, + { + "name": "salary", + "type": "integer" + }, + { + "name": "department", + "type": "string" + }, + { + "name": "occupation", + "type": "string" + } + ], + "datarows": [ + [ + 1000, + "Jake", + "England", + 100000, + "IT", + "Engineer" + ], + [ + 1001, + "Hello", + "USA", + 70000, + null, + null + ], + [ + 1002, + "John", + "Canada", + 120000, + "DATA", + "Scientist" + ], + [ + 1003, + "David", + null, + 120000, + "HR", + "Doctor" + ], + [ + 1004, + "David", + "Canada", + 0, + null, + null + ], + [ + 1005, + "Jane", + "Canada", + 90000, + "DATA", + "Engineer" + ] + ], + "total": 6, + "size": 6 +} +``` + +## Example 4: OutputField as a new field + +This example shows using the lookup command with outputField as a new field name. + +```bash ignore +curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ + "query" : """ + source = worker + | LOOKUP work_information name REPLACE occupation AS new_col + | fields id, name, occupation, country, salary, new_col + """ +}' +``` + +Result set + +```json +{ + "schema": [ + { + "name": "id", + "type": "integer" + }, + { + "name": "name", + "type": "string" + }, + { + "name": "occupation", + "type": "string" + }, + { + "name": "country", + "type": "string" + }, + { + "name": "salary", + "type": "integer" + }, + { + "name": "new_col", + "type": "string" + } + ], + "datarows": [ + [ + 1003, + "David", + "Doctor", + null, + 120000, + "Doctor" + ], + [ + 1004, + "David", + null, + "Canada", + 0, + "Doctor" + ], + [ + 1001, + "Hello", + "Artist", + "USA", + 70000, + null + ], + [ + 1000, + "Jake", + "Engineer", + "England", + 100000, + "Engineer" + ], + [ + 1005, + "Jane", + "Scientist", + "Canada", + 90000, + "Engineer" + ], + [ + 1002, + "John", + "Doctor", + "Canada", + 120000, + "Scientist" + ] + ], + "total": 6, + "size": 6 +} +``` \ No newline at end of file diff --git a/docs/user/ppl/cmd/lookup.rst b/docs/user/ppl/cmd/lookup.rst deleted file mode 100644 index 4d4cf84a48b..00000000000 --- a/docs/user/ppl/cmd/lookup.rst +++ /dev/null @@ -1,350 +0,0 @@ -====== -lookup -====== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``lookup`` command enriches your search data by adding or replacing data from a lookup index (dimension table). You can extend fields of an index with values from a dimension table, append or replace values when lookup condition is matched. As an alternative of join command, lookup command is more suitable for enriching the source data with a static dataset. - -Syntax -====== -lookup ( [as ])... [(replace | append) ( [as ])...] - -* lookupIndex: mandatory. The name of lookup index (dimension table). -* lookupMappingField: mandatory. A mapping key in ``lookupIndex``, analogy to a join key from right table. You can specify multiple ``lookupMappingField`` with comma-delimited. -* sourceMappingField: optional. A mapping key from source (left side), analogy to a join key from left side. If not specified, defaults to ``lookupMappingField``. -* inputField: optional. A field in ``lookupIndex`` where matched values are applied to result output. You can specify multiple ``inputField`` with comma-delimited. If not specified, all fields except ``lookupMappingField`` from ``lookupIndex`` are applied to result output. -* outputField: optional. A field of output. You can specify zero or multiple ``outputField``. If ``outputField`` has an existing field name in source query, its values will be replaced or appended by matched values from ``inputField``. If the field specified in ``outputField`` is a new field, in replace strategy, an extended new field will be applied to the results, but fail in append strategy. -* replace | append: optional. The output strategies. If replace, matched values in ``lookupIndex`` field overwrite the values in result. If append, matched values in ``lookupIndex`` field only append to the missing values in result. **Default:** replace. - -Usage -===== - -Lookup:: - - source = table1 | lookup table2 id - source = table1 | lookup table2 id, name - source = table1 | lookup table2 id as cid, name - source = table1 | lookup table2 id as cid, name replace dept as department - source = table1 | lookup table2 id as cid, name replace dept as department, city as location - source = table1 | lookup table2 id as cid, name append dept as department - source = table1 | lookup table2 id as cid, name append dept as department, city as location - - -Example 1: Replace strategy -=========================== - -This example shows using the lookup command with the REPLACE strategy to overwrite existing values. - -PPL query:: - - >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ - "query" : """ - source = worker - | LOOKUP work_information uid AS id REPLACE department - | fields id, name, occupation, country, salary, department - """ - }' - -Result set:: - - { - "schema": [ - { - "name": "id", - "type": "integer" - }, - { - "name": "name", - "type": "string" - }, - { - "name": "occupation", - "type": "string" - }, - { - "name": "country", - "type": "string" - }, - { - "name": "salary", - "type": "integer" - }, - { - "name": "department", - "type": "string" - } - ], - "datarows": [ - [ - 1000, - "Jake", - "Engineer", - "England", - 100000, - "IT" - ], - [ - 1001, - "Hello", - "Artist", - "USA", - 70000, - null - ], - [ - 1002, - "John", - "Doctor", - "Canada", - 120000, - "DATA" - ], - [ - 1003, - "David", - "Doctor", - null, - 120000, - "HR" - ], - [ - 1004, - "David", - null, - "Canada", - 0, - null - ], - [ - 1005, - "Jane", - "Scientist", - "Canada", - 90000, - "DATA" - ] - ], - "total": 6, - "size": 6 - } - -Example 2: Append strategy -========================== - -This example shows using the lookup command with the APPEND strategy to fill missing values only. - -PPL query:: - - >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ - "query" : """ - source = worker - | LOOKUP work_information uid AS id APPEND department - | fields id, name, occupation, country, salary, department - """ - }' - - -Example 3: No inputField specified -================================== - -This example shows using the lookup command without specifying inputField, which applies all fields from the lookup index. - -PPL query:: - - >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ - "query" : """ - source = worker - | LOOKUP work_information uid AS id, name - | fields id, name, occupation, country, salary, department - """ - }' - -Result set:: - - { - "schema": [ - { - "name": "id", - "type": "integer" - }, - { - "name": "name", - "type": "string" - }, - { - "name": "country", - "type": "string" - }, - { - "name": "salary", - "type": "integer" - }, - { - "name": "department", - "type": "string" - }, - { - "name": "occupation", - "type": "string" - } - ], - "datarows": [ - [ - 1000, - "Jake", - "England", - 100000, - "IT", - "Engineer" - ], - [ - 1001, - "Hello", - "USA", - 70000, - null, - null - ], - [ - 1002, - "John", - "Canada", - 120000, - "DATA", - "Scientist" - ], - [ - 1003, - "David", - null, - 120000, - "HR", - "Doctor" - ], - [ - 1004, - "David", - "Canada", - 0, - null, - null - ], - [ - 1005, - "Jane", - "Canada", - 90000, - "DATA", - "Engineer" - ] - ], - "total": 6, - "size": 6 - } - -Example 4: OutputField as a new field -===================================== - -This example shows using the lookup command with outputField as a new field name. - -PPL query:: - - >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ - "query" : """ - source = worker - | LOOKUP work_information name REPLACE occupation AS new_col - | fields id, name, occupation, country, salary, new_col - """ - }' - -Result set:: - - { - "schema": [ - { - "name": "id", - "type": "integer" - }, - { - "name": "name", - "type": "string" - }, - { - "name": "occupation", - "type": "string" - }, - { - "name": "country", - "type": "string" - }, - { - "name": "salary", - "type": "integer" - }, - { - "name": "new_col", - "type": "string" - } - ], - "datarows": [ - [ - 1003, - "David", - "Doctor", - null, - 120000, - "Doctor" - ], - [ - 1004, - "David", - null, - "Canada", - 0, - "Doctor" - ], - [ - 1001, - "Hello", - "Artist", - "USA", - 70000, - null - ], - [ - 1000, - "Jake", - "Engineer", - "England", - 100000, - "Engineer" - ], - [ - 1005, - "Jane", - "Scientist", - "Canada", - 90000, - "Engineer" - ], - [ - 1002, - "John", - "Doctor", - "Canada", - 120000, - "Scientist" - ] - ], - "total": 6, - "size": 6 - } - diff --git a/docs/user/ppl/cmd/ml.md b/docs/user/ppl/cmd/ml.md new file mode 100644 index 00000000000..38098954bfb --- /dev/null +++ b/docs/user/ppl/cmd/ml.md @@ -0,0 +1,153 @@ +# ml + +## Description + +Use the `ml` command to train/predict/train and predict on any algorithm in the ml-commons plugin on the search result returned by a PPL command. +## Syntax + +## AD - Fixed In Time RCF For Time-series Data: + +ml action='train' algorithm='rcf' \ \ \ \ \ \ \ \ \ +* number_of_trees: optional integer. Number of trees in the forest. **Default:** 30. +* shingle_size: optional integer. A shingle is a consecutive sequence of the most recent records. **Default:** 8. +* sample_size: optional integer. The sample size used by stream samplers in this forest. **Default:** 256. +* output_after: optional integer. The number of points required by stream samplers before results are returned. **Default:** 32. +* time_decay: optional double. The decay factor used by stream samplers in this forest. **Default:** 0.0001. +* anomaly_rate: optional double. The anomaly rate. **Default:** 0.005. +* time_field: mandatory string. It specifies the time field for RCF to use as time-series data. +* date_format: optional string. It's used for formatting time_field field. **Default:** "yyyy-MM-dd HH:mm:ss". +* time_zone: optional string. It's used for setting time zone for time_field field. **Default:** UTC. +* category_field: optional string. It specifies the category field used to group inputs. Each category will be independently predicted. + +## AD - Batch RCF for Non-time-series Data: + +ml action='train' algorithm='rcf' \ \ \ \ \ +* number_of_trees: optional integer. Number of trees in the forest. **Default:** 30. +* sample_size: optional integer. Number of random samples given to each tree from the training data set. **Default:** 256. +* output_after: optional integer. The number of points required by stream samplers before results are returned. **Default:** 32. +* training_data_size: optional integer. **Default:** size of your training data set. +* anomaly_score_threshold: optional double. The threshold of anomaly score. **Default:** 1.0. +* category_field: optional string. It specifies the category field used to group inputs. Each category will be independently predicted. + +## KMEANS: + +ml action='train' algorithm='kmeans' \ \ \ +* centroids: optional integer. The number of clusters you want to group your data points into. **Default:** 2. +* iterations: optional integer. Number of iterations. **Default:** 10. +* distance_type: optional string. The distance type can be COSINE, L1, or EUCLIDEAN. **Default:** EUCLIDEAN. + +## Example 1: Detecting events in New York City from taxi ridership data with time-series data + +This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data. + +```ppl +source=nyc_taxi +| fields value, timestamp +| ml action='train' algorithm='rcf' time_field='timestamp' +| where value=10844.0 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+---------------------+-------+---------------+ +| value | timestamp | score | anomaly_grade | +|---------+---------------------+-------+---------------| +| 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | ++---------+---------------------+-------+---------------+ +``` + +## Example 2: Detecting events in New York City from taxi ridership data with time-series data independently with each category + +This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data with multiple category values. + +```ppl +source=nyc_taxi +| fields category, value, timestamp +| ml action='train' algorithm='rcf' time_field='timestamp' category_field='category' +| where value=10844.0 or value=6526.0 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------+---------+---------------------+-------+---------------+ +| category | value | timestamp | score | anomaly_grade | +|----------+---------+---------------------+-------+---------------| +| night | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | +| day | 6526.0 | 2014-07-01 06:00:00 | 0.0 | 0.0 | ++----------+---------+---------------------+-------+---------------+ +``` + +## Example 3: Detecting events in New York City from taxi ridership data with non-time-series data + +This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data. + +```ppl +source=nyc_taxi +| fields value +| ml action='train' algorithm='rcf' +| where value=10844.0 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+-------+-----------+ +| value | score | anomalous | +|---------+-------+-----------| +| 10844.0 | 0.0 | False | ++---------+-------+-----------+ +``` + +## Example 4: Detecting events in New York City from taxi ridership data with non-time-series data independently with each category + +This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data with multiple category values. + +```ppl +source=nyc_taxi +| fields category, value +| ml action='train' algorithm='rcf' category_field='category' +| where value=10844.0 or value=6526.0 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------+---------+-------+-----------+ +| category | value | score | anomalous | +|----------+---------+-------+-----------| +| night | 10844.0 | 0.0 | False | +| day | 6526.0 | 0.0 | False | ++----------+---------+-------+-----------+ +``` + +## Example 5: KMEANS - Clustering of Iris Dataset + +This example shows how to use KMEANS to classify three Iris species (Iris setosa, Iris virginica and Iris versicolor) based on the combination of four features measured from each sample: the length and the width of the sepals and petals. + +```ppl +source=iris_data +| fields sepal_length_in_cm, sepal_width_in_cm, petal_length_in_cm, petal_width_in_cm +| ml action='train' algorithm='kmeans' centroids=3 +``` + +Expected output: + +```text ++--------------------+-------------------+--------------------+-------------------+-----------+ +| sepal_length_in_cm | sepal_width_in_cm | petal_length_in_cm | petal_width_in_cm | ClusterID | +|--------------------+-------------------+--------------------+-------------------+-----------| +| 5.1 | 3.5 | 1.4 | 0.2 | 1 | +| 5.6 | 3.0 | 4.1 | 1.3 | 0 | +| 6.7 | 2.5 | 5.8 | 1.8 | 2 | ++--------------------+-------------------+--------------------+-------------------+-----------+ +``` + +## Limitations + +The `ml` command can only work with `plugins.calcite.enabled=false`. \ No newline at end of file diff --git a/docs/user/ppl/cmd/ml.rst b/docs/user/ppl/cmd/ml.rst deleted file mode 100644 index 371df4de880..00000000000 --- a/docs/user/ppl/cmd/ml.rst +++ /dev/null @@ -1,138 +0,0 @@ -== -ml -== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| Use the ``ml`` command to train/predict/train and predict on any algorithm in the ml-commons plugin on the search result returned by a PPL command. - -Syntax -====== - -AD - Fixed In Time RCF For Time-series Data: --------------------------------------------- - -ml action='train' algorithm='rcf' - -* number_of_trees: optional integer. Number of trees in the forest. **Default:** 30. -* shingle_size: optional integer. A shingle is a consecutive sequence of the most recent records. **Default:** 8. -* sample_size: optional integer. The sample size used by stream samplers in this forest. **Default:** 256. -* output_after: optional integer. The number of points required by stream samplers before results are returned. **Default:** 32. -* time_decay: optional double. The decay factor used by stream samplers in this forest. **Default:** 0.0001. -* anomaly_rate: optional double. The anomaly rate. **Default:** 0.005. -* time_field: mandatory string. It specifies the time field for RCF to use as time-series data. -* date_format: optional string. It's used for formatting time_field field. **Default:** "yyyy-MM-dd HH:mm:ss". -* time_zone: optional string. It's used for setting time zone for time_field field. **Default:** UTC. -* category_field: optional string. It specifies the category field used to group inputs. Each category will be independently predicted. - -AD - Batch RCF for Non-time-series Data: ----------------------------------------- - -ml action='train' algorithm='rcf' - -* number_of_trees: optional integer. Number of trees in the forest. **Default:** 30. -* sample_size: optional integer. Number of random samples given to each tree from the training data set. **Default:** 256. -* output_after: optional integer. The number of points required by stream samplers before results are returned. **Default:** 32. -* training_data_size: optional integer. **Default:** size of your training data set. -* anomaly_score_threshold: optional double. The threshold of anomaly score. **Default:** 1.0. -* category_field: optional string. It specifies the category field used to group inputs. Each category will be independently predicted. - -KMEANS: -------- - -ml action='train' algorithm='kmeans' - -* centroids: optional integer. The number of clusters you want to group your data points into. **Default:** 2. -* iterations: optional integer. Number of iterations. **Default:** 10. -* distance_type: optional string. The distance type can be COSINE, L1, or EUCLIDEAN. **Default:** EUCLIDEAN. - -Example 1: Detecting events in New York City from taxi ridership data with time-series data -=========================================================================================== - -This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data. - -PPL query:: - - os> source=nyc_taxi | fields value, timestamp | ml action='train' algorithm='rcf' time_field='timestamp' | where value=10844.0 - fetched rows / total rows = 1/1 - +---------+---------------------+-------+---------------+ - | value | timestamp | score | anomaly_grade | - |---------+---------------------+-------+---------------| - | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | - +---------+---------------------+-------+---------------+ - -Example 2: Detecting events in New York City from taxi ridership data with time-series data independently with each category -============================================================================================================================ - -This example trains an RCF model and uses the model to detect anomalies in the time-series ridership data with multiple category values. - -PPL query:: - - os> source=nyc_taxi | fields category, value, timestamp | ml action='train' algorithm='rcf' time_field='timestamp' category_field='category' | where value=10844.0 or value=6526.0 - fetched rows / total rows = 2/2 - +----------+---------+---------------------+-------+---------------+ - | category | value | timestamp | score | anomaly_grade | - |----------+---------+---------------------+-------+---------------| - | night | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | - | day | 6526.0 | 2014-07-01 06:00:00 | 0.0 | 0.0 | - +----------+---------+---------------------+-------+---------------+ - - -Example 3: Detecting events in New York City from taxi ridership data with non-time-series data -=============================================================================================== - -This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data. - -PPL query:: - - os> source=nyc_taxi | fields value | ml action='train' algorithm='rcf' | where value=10844.0 - fetched rows / total rows = 1/1 - +---------+-------+-----------+ - | value | score | anomalous | - |---------+-------+-----------| - | 10844.0 | 0.0 | False | - +---------+-------+-----------+ - -Example 4: Detecting events in New York City from taxi ridership data with non-time-series data independently with each category -================================================================================================================================ - -This example trains an RCF model and uses the model to detect anomalies in the non-time-series ridership data with multiple category values. - -PPL query:: - - os> source=nyc_taxi | fields category, value | ml action='train' algorithm='rcf' category_field='category' | where value=10844.0 or value=6526.0 - fetched rows / total rows = 2/2 - +----------+---------+-------+-----------+ - | category | value | score | anomalous | - |----------+---------+-------+-----------| - | night | 10844.0 | 0.0 | False | - | day | 6526.0 | 0.0 | False | - +----------+---------+-------+-----------+ - -Example 5: KMEANS - Clustering of Iris Dataset -=============================================== - -This example shows how to use KMEANS to classify three Iris species (Iris setosa, Iris virginica and Iris versicolor) based on the combination of four features measured from each sample: the length and the width of the sepals and petals. - -PPL query:: - - os> source=iris_data | fields sepal_length_in_cm, sepal_width_in_cm, petal_length_in_cm, petal_width_in_cm | ml action='train' algorithm='kmeans' centroids=3 - +--------------------+-------------------+--------------------+-------------------+-----------+ - | sepal_length_in_cm | sepal_width_in_cm | petal_length_in_cm | petal_width_in_cm | ClusterID | - |--------------------+-------------------+--------------------+-------------------+-----------| - | 5.1 | 3.5 | 1.4 | 0.2 | 1 | - | 5.6 | 3.0 | 4.1 | 1.3 | 0 | - | 6.7 | 2.5 | 5.8 | 1.8 | 2 | - +--------------------+-------------------+--------------------+-------------------+-----------+ - - -Limitations -=========== -The ``ml`` command can only work with ``plugins.calcite.enabled=false``. diff --git a/docs/user/ppl/cmd/multisearch.md b/docs/user/ppl/cmd/multisearch.md new file mode 100644 index 00000000000..0b6e8ae208e --- /dev/null +++ b/docs/user/ppl/cmd/multisearch.md @@ -0,0 +1,152 @@ +# multisearch + +## Description + +Use the `multisearch` command to run multiple search subsearches and merge their results together. The command allows you to combine data from different queries on the same or different sources, and optionally apply subsequent processing to the combined result set. +Key aspects of `multisearch`: +1. Combines results from multiple search operations into a single result set. +2. Each subsearch can have different filtering criteria, data transformations, and field selections. +3. Results are merged and can be further processed with aggregations, sorting, and other PPL commands. +4. Particularly useful for comparative analysis, union operations, and creating comprehensive datasets from multiple search criteria. +5. Supports timestamp-based result interleaving when working with time-series data. + +Use Cases: +* **Comparative Analysis**: Compare metrics across different segments, regions, or time periods +* **Success Rate Monitoring**: Calculate success rates by comparing successful vs. total operations +* **Multi-source Data Combination**: Merge data from different indices or apply different filters to the same source +* **A/B Testing Analysis**: Combine results from different test groups for comparison +* **Time-series Data Merging**: Interleave events from multiple sources based on timestamps + +## Syntax + +multisearch \ \ \ ... +* subsearch1, subsearch2, ...: mandatory. At least two subsearches required. Each subsearch must be enclosed in square brackets and start with the `search` keyword. Format: `[search source=index | commands...]`. All PPL commands are supported within subsearches. +* result-processing: optional. Commands applied to the merged results after the multisearch operation, such as `stats`, `sort`, `head`, etc. + +## Usage + +Basic multisearch + +``` +| multisearch [search source=table | where condition1] [search source=table | where condition2] +| multisearch [search source=index1 | fields field1, field2] [search source=index2 | fields field1, field2] +| multisearch [search source=table | where status="success"] [search source=table | where status="error"] +``` + +## Example 1: Basic Age Group Analysis + +This example combines young and adult customers into a single result set for further analysis. + +```ppl +| multisearch [search source=accounts +| where age < 30 +| eval age_group = "young" +| fields firstname, age, age_group] [search source=accounts +| where age >= 30 +| eval age_group = "adult" +| fields firstname, age, age_group] +| sort age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+-----------+ +| firstname | age | age_group | +|-----------+-----+-----------| +| Nanette | 28 | young | +| Amber | 32 | adult | +| Dale | 33 | adult | +| Hattie | 36 | adult | ++-----------+-----+-----------+ +``` + +## Example 2: Success Rate Pattern + +This example combines high-balance and all valid accounts for comparison analysis. + +```ppl +| multisearch [search source=accounts +| where balance > 20000 +| eval query_type = "high_balance" +| fields firstname, balance, query_type] [search source=accounts +| where balance > 0 AND balance <= 20000 +| eval query_type = "regular" +| fields firstname, balance, query_type] +| sort balance desc +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+---------+--------------+ +| firstname | balance | query_type | +|-----------+---------+--------------| +| Amber | 39225 | high_balance | +| Nanette | 32838 | high_balance | +| Hattie | 5686 | regular | +| Dale | 4180 | regular | ++-----------+---------+--------------+ +``` + +## Example 3: Timestamp Interleaving + +This example combines time-series data from multiple sources with automatic timestamp-based ordering. + +```ppl +| multisearch [search source=time_data +| where category IN ("A", "B")] [search source=time_data2 +| where category IN ("E", "F")] +| fields @timestamp, category, value, timestamp +| head 5 +``` + +Expected output: + +```text +fetched rows / total rows = 5/5 ++---------------------+----------+-------+---------------------+ +| @timestamp | category | value | timestamp | +|---------------------+----------+-------+---------------------| +| 2025-08-01 04:00:00 | E | 2001 | 2025-08-01 04:00:00 | +| 2025-08-01 03:47:41 | A | 8762 | 2025-08-01 03:47:41 | +| 2025-08-01 02:30:00 | F | 2002 | 2025-08-01 02:30:00 | +| 2025-08-01 01:14:11 | B | 9015 | 2025-08-01 01:14:11 | +| 2025-08-01 01:00:00 | E | 2003 | 2025-08-01 01:00:00 | ++---------------------+----------+-------+---------------------+ +``` + +## Example 4: Type Compatibility - Missing Fields + +This example demonstrates how missing fields are handled with NULL insertion. + +```ppl +| multisearch [search source=accounts +| where age < 30 +| eval young_flag = "yes" +| fields firstname, age, young_flag] [search source=accounts +| where age >= 30 +| fields firstname, age] +| sort age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+------------+ +| firstname | age | young_flag | +|-----------+-----+------------| +| Nanette | 28 | yes | +| Amber | 32 | null | +| Dale | 33 | null | +| Hattie | 36 | null | ++-----------+-----+------------+ +``` + +## Limitations + +* **Minimum Subsearches**: At least two subsearches must be specified +* **Schema Compatibility**: When fields with the same name exist across subsearches but have incompatible types, the system automatically resolves conflicts by renaming the conflicting fields. The first occurrence retains the original name, while subsequent conflicting fields are renamed with a numeric suffix (e.g., `age` becomes `age0`, `age1`, etc.). This ensures all data is preserved while maintaining schema consistency. \ No newline at end of file diff --git a/docs/user/ppl/cmd/multisearch.rst b/docs/user/ppl/cmd/multisearch.rst deleted file mode 100644 index ed1e092c8af..00000000000 --- a/docs/user/ppl/cmd/multisearch.rst +++ /dev/null @@ -1,126 +0,0 @@ -=========== -multisearch -=========== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| Use the ``multisearch`` command to run multiple search subsearches and merge their results together. The command allows you to combine data from different queries on the same or different sources, and optionally apply subsequent processing to the combined result set. - -| Key aspects of ``multisearch``: - -1. Combines results from multiple search operations into a single result set. -2. Each subsearch can have different filtering criteria, data transformations, and field selections. -3. Results are merged and can be further processed with aggregations, sorting, and other PPL commands. -4. Particularly useful for comparative analysis, union operations, and creating comprehensive datasets from multiple search criteria. -5. Supports timestamp-based result interleaving when working with time-series data. - -| Use Cases: - -* **Comparative Analysis**: Compare metrics across different segments, regions, or time periods -* **Success Rate Monitoring**: Calculate success rates by comparing successful vs. total operations -* **Multi-source Data Combination**: Merge data from different indices or apply different filters to the same source -* **A/B Testing Analysis**: Combine results from different test groups for comparison -* **Time-series Data Merging**: Interleave events from multiple sources based on timestamps - -Syntax -====== -multisearch ... - -* subsearch1, subsearch2, ...: mandatory. At least two subsearches required. Each subsearch must be enclosed in square brackets and start with the ``search`` keyword. Format: ``[search source=index | commands...]``. All PPL commands are supported within subsearches. -* result-processing: optional. Commands applied to the merged results after the multisearch operation, such as ``stats``, ``sort``, ``head``, etc. - -Usage -===== - -Basic multisearch:: - - | multisearch [search source=table | where condition1] [search source=table | where condition2] - | multisearch [search source=index1 | fields field1, field2] [search source=index2 | fields field1, field2] - | multisearch [search source=table | where status="success"] [search source=table | where status="error"] - -Example 1: Basic Age Group Analysis -=================================== - -This example combines young and adult customers into a single result set for further analysis. - -PPL query:: - - os> | multisearch [search source=accounts | where age < 30 | eval age_group = "young" | fields firstname, age, age_group] [search source=accounts | where age >= 30 | eval age_group = "adult" | fields firstname, age, age_group] | sort age; - fetched rows / total rows = 4/4 - +-----------+-----+-----------+ - | firstname | age | age_group | - |-----------+-----+-----------| - | Nanette | 28 | young | - | Amber | 32 | adult | - | Dale | 33 | adult | - | Hattie | 36 | adult | - +-----------+-----+-----------+ - -Example 2: Success Rate Pattern -=============================== - -This example combines high-balance and all valid accounts for comparison analysis. - -PPL query:: - - os> | multisearch [search source=accounts | where balance > 20000 | eval query_type = "high_balance" | fields firstname, balance, query_type] [search source=accounts | where balance > 0 AND balance <= 20000 | eval query_type = "regular" | fields firstname, balance, query_type] | sort balance desc; - fetched rows / total rows = 4/4 - +-----------+---------+--------------+ - | firstname | balance | query_type | - |-----------+---------+--------------| - | Amber | 39225 | high_balance | - | Nanette | 32838 | high_balance | - | Hattie | 5686 | regular | - | Dale | 4180 | regular | - +-----------+---------+--------------+ - -Example 3: Timestamp Interleaving -================================= - -This example combines time-series data from multiple sources with automatic timestamp-based ordering. - -PPL query:: - - os> | multisearch [search source=time_data | where category IN ("A", "B")] [search source=time_data2 | where category IN ("E", "F")] | fields @timestamp, category, value, timestamp | head 5; - fetched rows / total rows = 5/5 - +---------------------+----------+-------+---------------------+ - | @timestamp | category | value | timestamp | - |---------------------+----------+-------+---------------------| - | 2025-08-01 04:00:00 | E | 2001 | 2025-08-01 04:00:00 | - | 2025-08-01 03:47:41 | A | 8762 | 2025-08-01 03:47:41 | - | 2025-08-01 02:30:00 | F | 2002 | 2025-08-01 02:30:00 | - | 2025-08-01 01:14:11 | B | 9015 | 2025-08-01 01:14:11 | - | 2025-08-01 01:00:00 | E | 2003 | 2025-08-01 01:00:00 | - +---------------------+----------+-------+---------------------+ - -Example 4: Type Compatibility - Missing Fields -================================================= - -This example demonstrates how missing fields are handled with NULL insertion. - -PPL query:: - - os> | multisearch [search source=accounts | where age < 30 | eval young_flag = "yes" | fields firstname, age, young_flag] [search source=accounts | where age >= 30 | fields firstname, age] | sort age; - fetched rows / total rows = 4/4 - +-----------+-----+------------+ - | firstname | age | young_flag | - |-----------+-----+------------| - | Nanette | 28 | yes | - | Amber | 32 | null | - | Dale | 33 | null | - | Hattie | 36 | null | - +-----------+-----+------------+ - - -Limitations -=========== - -* **Minimum Subsearches**: At least two subsearches must be specified -* **Schema Compatibility**: When fields with the same name exist across subsearches but have incompatible types, the system automatically resolves conflicts by renaming the conflicting fields. The first occurrence retains the original name, while subsequent conflicting fields are renamed with a numeric suffix (e.g., ``age`` becomes ``age0``, ``age1``, etc.). This ensures all data is preserved while maintaining schema consistency. diff --git a/docs/user/ppl/cmd/parse.md b/docs/user/ppl/cmd/parse.md new file mode 100644 index 00000000000..8e151ad888b --- /dev/null +++ b/docs/user/ppl/cmd/parse.md @@ -0,0 +1,133 @@ +# parse + +## Description + +The `parse` command parses a text field with a regular expression and appends the result to the search result. +## Syntax + +parse \ \ +* field: mandatory. The field must be a text field. +* pattern: mandatory. The regular expression pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. + +## Regular Expression + +The regular expression pattern is used to match the whole text field of each document with Java regex engine. Each named capture group in the expression will become a new `STRING` field. +## Example 1: Create a new field + +This example shows how to create a new field `host` for each document. `host` will be the host name after `@` in `email` field. Parsing a null field will return an empty string. + +```ppl +source=accounts +| parse email '.+@(?.+)' +| fields email, host +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+------------+ +| email | host | +|-----------------------+------------| +| amberduke@pyrami.com | pyrami.com | +| hattiebond@netagy.com | netagy.com | +| null | | +| daleadams@boink.com | boink.com | ++-----------------------+------------+ +``` + +## Example 2: Override an existing field + +This example shows how to override the existing `address` field with street number removed. + +```ppl +source=accounts +| parse address '\d+ (?
    .+)' +| fields address +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++------------------+ +| address | +|------------------| +| Holmes Lane | +| Bristol Street | +| Madison Street | +| Hutchinson Court | ++------------------+ +``` + +## Example 3: Filter and sort by casted parsed field + +This example shows how to sort street numbers that are higher than 500 in `address` field. + +```ppl +source=accounts +| parse address '(?\d+) (?.+)' +| where cast(streetNumber as int) > 500 +| sort num(streetNumber) +| fields streetNumber, street +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++--------------+----------------+ +| streetNumber | street | +|--------------+----------------| +| 671 | Bristol Street | +| 789 | Madison Street | +| 880 | Holmes Lane | ++--------------+----------------+ +``` + +## Limitations + +There are a few limitations with parse command: +- Fields defined by parse cannot be parsed again. + +The following command will not work + +``` +source=accounts | parse address '\d+ (?.+)' | parse street '\w+ (?\w+)' ; +``` + +- Fields defined by parse cannot be overridden with other commands. + +`where` will not match any documents since `street` cannot be overridden + +``` +source=accounts | parse address '\d+ (?.+)' | eval street='1' | where street='1' ; +``` + +- The text field used by parse cannot be overridden. + +`street` will not be successfully parsed since `address` is overridden + +``` +source=accounts | parse address '\d+ (?.+)' | eval address='1' ; +``` + +- Fields defined by parse cannot be filtered/sorted after using them in `stats` command. + +`where` in the following command will not work + +``` +source=accounts | parse email '.+@(?.+)' | stats avg(age) by host | where host=pyrami.com ; +``` + +- Fields defined by parse will not appear in the final result unless the original source field is included in the `fields` command. + +For example, the following query will not display the parsed fields `host` unless the source field `email` is also explicitly included + +``` +source=accounts | parse email '.+@(?.+)' | fields email, host ; +``` + +- Named capture group must start with a letter and contain only letters and digits. + + For detailed Java regex pattern syntax and usage, refer to the [official Java Pattern documentation](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html) \ No newline at end of file diff --git a/docs/user/ppl/cmd/parse.rst b/docs/user/ppl/cmd/parse.rst deleted file mode 100644 index 833736238b9..00000000000 --- a/docs/user/ppl/cmd/parse.rst +++ /dev/null @@ -1,119 +0,0 @@ -===== -parse -===== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``parse`` command parses a text field with a regular expression and appends the result to the search result. - - -Syntax -====== -parse - -* field: mandatory. The field must be a text field. -* pattern: mandatory. The regular expression pattern used to extract new fields from the given text field. If a new field name already exists, it will replace the original field. - -Regular Expression -================== -The regular expression pattern is used to match the whole text field of each document with Java regex engine. Each named capture group in the expression will become a new ``STRING`` field. - -Example 1: Create a new field -============================= - -This example shows how to create a new field ``host`` for each document. ``host`` will be the host name after ``@`` in ``email`` field. Parsing a null field will return an empty string. - -PPL query:: - - os> source=accounts | parse email '.+@(?.+)' | fields email, host ; - fetched rows / total rows = 4/4 - +-----------------------+------------+ - | email | host | - |-----------------------+------------| - | amberduke@pyrami.com | pyrami.com | - | hattiebond@netagy.com | netagy.com | - | null | | - | daleadams@boink.com | boink.com | - +-----------------------+------------+ - - -Example 2: Override an existing field -===================================== - -This example shows how to override the existing ``address`` field with street number removed. - -PPL query:: - - os> source=accounts | parse address '\d+ (?
    .+)' | fields address ; - fetched rows / total rows = 4/4 - +------------------+ - | address | - |------------------| - | Holmes Lane | - | Bristol Street | - | Madison Street | - | Hutchinson Court | - +------------------+ - -Example 3: Filter and sort by casted parsed field -================================================= - -This example shows how to sort street numbers that are higher than 500 in ``address`` field. - -PPL query:: - - os> source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort num(streetNumber) | fields streetNumber, street ; - fetched rows / total rows = 3/3 - +--------------+----------------+ - | streetNumber | street | - |--------------+----------------| - | 671 | Bristol Street | - | 789 | Madison Street | - | 880 | Holmes Lane | - +--------------+----------------+ - -Limitations -=========== - -There are a few limitations with parse command: - -- Fields defined by parse cannot be parsed again. - - The following command will not work:: - - source=accounts | parse address '\d+ (?.+)' | parse street '\w+ (?\w+)' ; - -- Fields defined by parse cannot be overridden with other commands. - - ``where`` will not match any documents since ``street`` cannot be overridden:: - - source=accounts | parse address '\d+ (?.+)' | eval street='1' | where street='1' ; - -- The text field used by parse cannot be overridden. - - ``street`` will not be successfully parsed since ``address`` is overridden:: - - source=accounts | parse address '\d+ (?.+)' | eval address='1' ; - -- Fields defined by parse cannot be filtered/sorted after using them in ``stats`` command. - - ``where`` in the following command will not work:: - - source=accounts | parse email '.+@(?.+)' | stats avg(age) by host | where host=pyrami.com ; - -- Fields defined by parse will not appear in the final result unless the original source field is included in the ``fields`` command. - - For example, the following query will not display the parsed fields ``host`` unless the source field ``email`` is also explicitly included:: - - source=accounts | parse email '.+@(?.+)' | fields email, host ; - -- Named capture group must start with a letter and contain only letters and digits. - - For detailed Java regex pattern syntax and usage, refer to the `official Java Pattern documentation `_ diff --git a/docs/user/ppl/cmd/patterns.md b/docs/user/ppl/cmd/patterns.md new file mode 100644 index 00000000000..7b9cb718891 --- /dev/null +++ b/docs/user/ppl/cmd/patterns.md @@ -0,0 +1,260 @@ +# patterns + +## Description + +The `patterns` command extracts log patterns from a text field and appends the results to the search result. Grouping logs by their patterns makes it easier to aggregate stats from large volumes of log data for analysis and troubleshooting. +`patterns` command allows users to select different log parsing algorithms to get high log pattern grouping accuracy. Two pattern methods are supported: `simple_pattern` and `brain`. +`simple_pattern` algorithm is basically a regex parsing method vs `brain` algorithm is an automatic log grouping algorithm with high grouping accuracy and keeps semantic meaning. +`patterns` command supports two modes: `label` and `aggregation`. `label` mode returns individual pattern labels. `aggregation` mode returns aggregated results on target field. +Calcite engine by default labels the variables with '\<*\>' placeholder. If `show_numbered_token` option is turned on, Calcite engine's `label` mode not only labels pattern of text but also labels variable tokens in map. In `aggregation` mode, it will also output labeled pattern as well as variable tokens per pattern. The variable placeholder is in the format of '' instead of '<\*>'. + +## Syntax + +patterns \ [by byClause...] [method=simple_pattern \| brain] [mode=label \| aggregation] [max_sample_count=integer] [buffer_limit=integer] [show_numbered_token=boolean] [new_field=\] (algorithm parameters...) +* field: mandatory. The text field to analyze for patterns. +* byClause: optional. Fields or scalar functions used to group logs for labeling/aggregation. +* method: optional. Algorithm choice: `simple_pattern` or `brain`. **Default:** `simple_pattern`. +* mode: optional. Output mode: `label` or `aggregation`. **Default:** `label`. +* max_sample_count: optional. Max sample logs returned per pattern in aggregation mode. **Default:** 10. +* buffer_limit: optional. Safeguard parameter for `brain` algorithm to limit internal temporary buffer size (min: 50,000). **Default:** 100,000. +* show_numbered_token: optional. The flag to turn on numbered token output format. **Default:** false. +* new_field: optional. Alias of the output pattern field. **Default:** "patterns_field". +* algorithm parameters: optional. Algorithm-specific tuning: + * `simple_pattern`: Define regex via "pattern". + * `brain`: Adjust sensitivity with variable_count_threshold and frequency_threshold_percentage. + * `variable_count_threshold`: optional integer. Words are split by space. Algorithm counts how many distinct words are at specific position in initial log groups. Adjusting this threshold can determine the sensitivity of constant words. **Default:** 5. + * `frequency_threshold_percentage`: optional double. Brain's log pattern is selected based on longest word combination. This sets the lower bound of frequency to ignore low frequency words. **Default:** 0.3. + +## Change the default pattern method + +To override default pattern parameters, users can run following command + +``` + PUT _cluster/settings + { + "persistent": { + "plugins.ppl.pattern.method": "brain", + "plugins.ppl.pattern.mode": "aggregation", + "plugins.ppl.pattern.max.sample.count": 5, + "plugins.ppl.pattern.buffer.limit": 50000, + "plugins.ppl.pattern.show.numbered.token": true + } + } +``` + +## Simple Pattern Example 1: Create the new field + +This example shows how to extract patterns in `email` for each document. Parsing a null field will return an empty string. + +```ppl +source=accounts +| patterns email method=simple_pattern +| fields email, patterns_field +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+----------------+ +| email | patterns_field | +|-----------------------+----------------| +| amberduke@pyrami.com | <*>@<*>.<*> | +| hattiebond@netagy.com | <*>@<*>.<*> | +| null | | +| daleadams@boink.com | <*>@<*>.<*> | ++-----------------------+----------------+ +``` + +## Simple Pattern Example 2: Extract log patterns + +This example shows how to extract patterns from a raw log field using the default patterns. + +```ppl +source=apache +| patterns message method=simple_pattern +| fields message, patterns_field +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------+ +| message | patterns_field | +|-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------| +| 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*>.<*>.<*>.<*> - <*> [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>-<*>/<*> <*>/<*>.<*>" <*> <*> | +| 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*>.<*>.<*>.<*> - <*> [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>/<*>/<*>/<*> <*>/<*>.<*>" <*> <*> | +| 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*>.<*>.<*>.<*> - - [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>/<*>-<*>-<*>-<*> <*>/<*>.<*>" <*> <*> | +| 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*>.<*>.<*>.<*> - - [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*> <*>/<*>.<*>" <*> <*> | ++-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------+ +``` + +## Simple Pattern Example 3: Extract log patterns with custom regex pattern + +This example shows how to extract patterns from a raw log field using user defined patterns. + +```ppl +source=apache +| patterns message method=simple_pattern new_field='no_numbers' pattern='[0-9]' +| fields message, no_numbers +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| message | no_numbers | +|-----------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*><*><*>.<*><*>.<*>.<*><*> - upton<*><*><*><*> [<*><*>/Sep/<*><*><*><*>:<*><*>:<*><*>:<*><*> -<*><*><*><*>] "HEAD /e-business/mindshare HTTP/<*>.<*>" <*><*><*> <*><*><*><*><*> | +| 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*><*><*>.<*><*>.<*><*><*>.<*> - pouros<*><*><*><*> [<*><*>/Sep/<*><*><*><*>:<*><*>:<*><*>:<*><*> -<*><*><*><*>] "GET /architectures/convergence/niches/mindshare HTTP/<*>.<*>" <*><*><*> <*><*><*><*><*> | +| 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*><*><*>.<*><*><*>.<*><*><*>.<*><*><*> - - [<*><*>/Sep/<*><*><*><*>:<*><*>:<*><*>:<*><*> -<*><*><*><*>] "PATCH /strategize/out-of-the-box HTTP/<*>.<*>" <*><*><*> <*><*><*><*><*> | +| 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*><*><*>.<*><*><*>.<*><*>.<*><*><*> - - [<*><*>/Sep/<*><*><*><*>:<*><*>:<*><*>:<*><*> -<*><*><*><*>] "POST /users HTTP/<*>.<*>" <*><*><*> <*><*><*><*> | ++-----------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +## Simple Pattern Example 4: Return log patterns aggregation result + +This example shows how to get aggregated results from a raw log field. + +```ppl +source=apache +| patterns message method=simple_pattern mode=aggregation +| fields patterns_field, pattern_count, sample_logs +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------------------------------------------------------------------------------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------+ +| patterns_field | pattern_count | sample_logs | +|---------------------------------------------------------------------------------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------| +| <*>.<*>.<*>.<*> - - [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*> <*>/<*>.<*>" <*> <*> | 1 | [210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481] | +| <*>.<*>.<*>.<*> - - [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>/<*>-<*>-<*>-<*> <*>/<*>.<*>" <*> <*> | 1 | [118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439] | +| <*>.<*>.<*>.<*> - <*> [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>-<*>/<*> <*>/<*>.<*>" <*> <*> | 1 | [177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927] | +| <*>.<*>.<*>.<*> - <*> [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>/<*>/<*>/<*> <*>/<*>.<*>" <*> <*> | 1 | [127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722] | ++---------------------------------------------------------------------------------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------+ +``` + +## Simple Pattern Example 5: Return log patterns aggregation result with detected variable tokens + +This example shows how to get aggregated results with detected variable tokens. +## Configuration + +With option `show_numbered_token` enabled, the output can detect numbered variable tokens from the pattern field. + +```ppl +source=apache +| patterns message method=simple_pattern mode=aggregation show_numbered_token=true +| fields patterns_field, pattern_count, tokens +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| patterns_field | pattern_count | tokens | +|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ... - - [//::: -] " / /." | 1 | {'': ['HTTP'], '': ['users'], '': ['1'], '': ['1'], '': ['9481'], '': ['301'], '': ['28'], '': ['104'], '': ['2022'], '': ['Sep'], '': ['15'], '': ['10'], '': ['57'], '': ['210'], '': ['POST'], '': ['15'], '': ['0700'], '': ['204']} | ++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +## Brain Example 1: Extract log patterns + +This example shows how to extract semantic meaningful log patterns from a raw log field using the brain algorithm. The default variable count threshold is 5. + +```ppl +source=apache +| patterns message method=brain +| fields message, patterns_field +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+ +| message | patterns_field | +|-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------| +| 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] "HEAD /e-business/mindshare HTTP/<*>" 404 <*> | +| 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] "GET /architectures/convergence/niches/mindshare HTTP/<*>" 100 <*> | +| 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*IP*> - - [<*>/Sep/<*>:<*>:<*>:<*> <*>] "PATCH /strategize/out-of-the-box HTTP/<*>" 401 <*> | +| 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*IP*> - - [<*>/Sep/<*>:<*>:<*>:<*> <*>] "POST /users HTTP/<*>" 301 <*> | ++-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+ +``` + +## Brain Example 2: Extract log patterns with custom parameters + +This example shows how to extract semantic meaningful log patterns from a raw log field using custom parameters of the brain algorithm. + +```ppl +source=apache +| patterns message method=brain variable_count_threshold=2 +| fields message, patterns_field +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------+ +| message | patterns_field | +|-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------| +| 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | +| 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | +| 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | +| 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | ++-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------+ +``` + +## Brain Example 3: Return log patterns aggregation result + +This example shows how to get aggregated results from a raw log field using the brain algorithm. + +```ppl +source=apache +| patterns message method=brain mode=aggregation variable_count_threshold=2 +| fields patterns_field, pattern_count, sample_logs +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| patterns_field | pattern_count | sample_logs | +|----------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | 4 | [177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927,127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722,118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439,210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481] | ++----------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +## Brain Example 4: Return log patterns aggregation result with detected variable tokens + +This example shows how to get aggregated results with detected variable tokens using the brain algorithm. + +With option `show_numbered_token` enabled, the output can detect numbered variable tokens from the pattern field. + +```ppl +source=apache +| patterns message method=brain mode=aggregation show_numbered_token=true variable_count_threshold=2 +| fields patterns_field, pattern_count, tokens +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| patterns_field | pattern_count | tokens | +|----------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| - [/Sep/::: ] HTTP/" | 4 | {'': ['19927', '28722', '27439', '9481'], '': ['10', '10', '10', '10'], '': ['2022', '2022', '2022', '2022'], '': ['57', '57', '57', '57'], '': ['15', '15', '15', '15'], '': ['"HEAD', '"GET', '"PATCH', '"POST'], '': ['-0700', '-0700', '-0700', '-0700'], '': ['/e-business/mindshare', '/architectures/convergence/niches/mindshare', '/strategize/out-of-the-box', '/users'], '': ['177.95.8.74', '127.45.152.6', '118.223.210.10... | ++----------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +## Limitations + +- Patterns command is not pushed down to OpenSearch data node for now. It will only group log patterns on log messages returned to coordinator node. \ No newline at end of file diff --git a/docs/user/ppl/cmd/patterns.rst b/docs/user/ppl/cmd/patterns.rst deleted file mode 100644 index ec87aca7494..00000000000 --- a/docs/user/ppl/cmd/patterns.rst +++ /dev/null @@ -1,225 +0,0 @@ -======== -patterns -======== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``patterns`` command extracts log patterns from a text field and appends the results to the search result. Grouping logs by their patterns makes it easier to aggregate stats from large volumes of log data for analysis and troubleshooting. - -| ``patterns`` command allows users to select different log parsing algorithms to get high log pattern grouping accuracy. Two pattern methods are supported: ``simple_pattern`` and ``brain``. - -| ``simple_pattern`` algorithm is basically a regex parsing method vs ``brain`` algorithm is an automatic log grouping algorithm with high grouping accuracy and keeps semantic meaning. - -| ``patterns`` command supports two modes: ``label`` and ``aggregation``. ``label`` mode returns individual pattern labels. ``aggregation`` mode returns aggregated results on target field. - -| Calcite engine by default labels the variables with '<*>' placeholder. If ``show_numbered_token`` option is turned on, Calcite engine's ``label`` mode not only labels pattern of text but also labels variable tokens in map. In ``aggregation`` mode, it will also output labeled pattern as well as variable tokens per pattern. The variable placeholder is in the format of '' instead of '<*>'. - -Syntax -====== -patterns [by byClause...] [method=simple_pattern | brain] [mode=label | aggregation] [max_sample_count=integer] [buffer_limit=integer] [show_numbered_token=boolean] [new_field=] (algorithm parameters...) - -* field: mandatory. The text field to analyze for patterns. -* byClause: optional. Fields or scalar functions used to group logs for labeling/aggregation. -* method: optional. Algorithm choice: ``simple_pattern`` or ``brain``. **Default:** ``simple_pattern``. -* mode: optional. Output mode: ``label`` or ``aggregation``. **Default:** ``label``. -* max_sample_count: optional. Max sample logs returned per pattern in aggregation mode. **Default:** 10. -* buffer_limit: optional. Safeguard parameter for ``brain`` algorithm to limit internal temporary buffer size (min: 50,000). **Default:** 100,000. -* show_numbered_token: optional. The flag to turn on numbered token output format. **Default:** false. -* new_field: optional. Alias of the output pattern field. **Default:** "patterns_field". -* algorithm parameters: optional. Algorithm-specific tuning: - - * ``simple_pattern``: Define regex via "pattern". - * ``brain``: Adjust sensitivity with variable_count_threshold and frequency_threshold_percentage. - - * ``variable_count_threshold``: optional integer. Words are split by space. Algorithm counts how many distinct words are at specific position in initial log groups. Adjusting this threshold can determine the sensitivity of constant words. **Default:** 5. - * ``frequency_threshold_percentage``: optional double. Brain's log pattern is selected based on longest word combination. This sets the lower bound of frequency to ignore low frequency words. **Default:** 0.3. - -Change the default pattern method -================================= -To override default pattern parameters, users can run following command - -.. code-block:: - - PUT _cluster/settings - { - "persistent": { - "plugins.ppl.pattern.method": "brain", - "plugins.ppl.pattern.mode": "aggregation", - "plugins.ppl.pattern.max.sample.count": 5, - "plugins.ppl.pattern.buffer.limit": 50000, - "plugins.ppl.pattern.show.numbered.token": true - } - } - -Simple Pattern Example 1: Create the new field -============================================== - -This example shows how to extract patterns in ``email`` for each document. Parsing a null field will return an empty string. - -PPL query:: - - os> source=accounts | patterns email method=simple_pattern | fields email, patterns_field ; - fetched rows / total rows = 4/4 - +-----------------------+----------------+ - | email | patterns_field | - |-----------------------+----------------| - | amberduke@pyrami.com | <*>@<*>.<*> | - | hattiebond@netagy.com | <*>@<*>.<*> | - | null | | - | daleadams@boink.com | <*>@<*>.<*> | - +-----------------------+----------------+ - -Simple Pattern Example 2: Extract log patterns -============================================== - -This example shows how to extract patterns from a raw log field using the default patterns. - -PPL query:: - - os> source=apache | patterns message method=simple_pattern | fields message, patterns_field ; - fetched rows / total rows = 4/4 - +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------+ - | message | patterns_field | - |-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------| - | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*>.<*>.<*>.<*> - <*> [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>-<*>/<*> <*>/<*>.<*>" <*> <*> | - | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*>.<*>.<*>.<*> - <*> [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>/<*>/<*>/<*> <*>/<*>.<*>" <*> <*> | - | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*>.<*>.<*>.<*> - - [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>/<*>-<*>-<*>-<*> <*>/<*>.<*>" <*> <*> | - | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*>.<*>.<*>.<*> - - [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*> <*>/<*>.<*>" <*> <*> | - +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------+ - -Simple Pattern Example 3: Extract log patterns with custom regex pattern -======================================================================== - -This example shows how to extract patterns from a raw log field using user defined patterns. - -PPL query:: - - os> source=apache | patterns message method=simple_pattern new_field='no_numbers' pattern='[0-9]' | fields message, no_numbers ; - fetched rows / total rows = 4/4 - +-----------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | message | no_numbers | - |-----------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*><*><*>.<*><*>.<*>.<*><*> - upton<*><*><*><*> [<*><*>/Sep/<*><*><*><*>:<*><*>:<*><*>:<*><*> -<*><*><*><*>] "HEAD /e-business/mindshare HTTP/<*>.<*>" <*><*><*> <*><*><*><*><*> | - | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*><*><*>.<*><*>.<*><*><*>.<*> - pouros<*><*><*><*> [<*><*>/Sep/<*><*><*><*>:<*><*>:<*><*>:<*><*> -<*><*><*><*>] "GET /architectures/convergence/niches/mindshare HTTP/<*>.<*>" <*><*><*> <*><*><*><*><*> | - | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*><*><*>.<*><*><*>.<*><*><*>.<*><*><*> - - [<*><*>/Sep/<*><*><*><*>:<*><*>:<*><*>:<*><*> -<*><*><*><*>] "PATCH /strategize/out-of-the-box HTTP/<*>.<*>" <*><*><*> <*><*><*><*><*> | - | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*><*><*>.<*><*><*>.<*><*>.<*><*><*> - - [<*><*>/Sep/<*><*><*><*>:<*><*>:<*><*>:<*><*> -<*><*><*><*>] "POST /users HTTP/<*>.<*>" <*><*><*> <*><*><*><*> | - +-----------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Simple Pattern Example 4: Return log patterns aggregation result -================================================================ - -This example shows how to get aggregated results from a raw log field. - -PPL query:: - - os> source=apache | patterns message method=simple_pattern mode=aggregation | fields patterns_field, pattern_count, sample_logs ; - fetched rows / total rows = 4/4 - +---------------------------------------------------------------------------------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------+ - | patterns_field | pattern_count | sample_logs | - |---------------------------------------------------------------------------------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------| - | <*>.<*>.<*>.<*> - - [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*> <*>/<*>.<*>" <*> <*> | 1 | [210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481] | - | <*>.<*>.<*>.<*> - - [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>/<*>-<*>-<*>-<*> <*>/<*>.<*>" <*> <*> | 1 | [118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439] | - | <*>.<*>.<*>.<*> - <*> [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>-<*>/<*> <*>/<*>.<*>" <*> <*> | 1 | [177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927] | - | <*>.<*>.<*>.<*> - <*> [<*>/<*>/<*>:<*>:<*>:<*> -<*>] "<*> /<*>/<*>/<*>/<*> <*>/<*>.<*>" <*> <*> | 1 | [127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722] | - +---------------------------------------------------------------------------------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------+ - -Simple Pattern Example 5: Return log patterns aggregation result with detected variable tokens -============================================================================================== - -This example shows how to get aggregated results with detected variable tokens. - -Configuration -------------- -With option ``show_numbered_token`` enabled, the output can detect numbered variable tokens from the pattern field. - -PPL query:: - - os> source=apache | patterns message method=simple_pattern mode=aggregation show_numbered_token=true | fields patterns_field, pattern_count, tokens | head 1 ; - fetched rows / total rows = 1/1 - +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | patterns_field | pattern_count | tokens | - |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | ... - - [//::: -] " / /." | 1 | {'': ['HTTP'], '': ['users'], '': ['1'], '': ['1'], '': ['9481'], '': ['301'], '': ['28'], '': ['104'], '': ['2022'], '': ['Sep'], '': ['15'], '': ['10'], '': ['57'], '': ['210'], '': ['POST'], '': ['15'], '': ['0700'], '': ['204']} | - +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Brain Example 1: Extract log patterns -===================================== - -This example shows how to extract semantic meaningful log patterns from a raw log field using the brain algorithm. The default variable count threshold is 5. - -PPL query:: - - os> source=apache | patterns message method=brain | fields message, patterns_field ; - fetched rows / total rows = 4/4 - +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+ - | message | patterns_field | - |-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------| - | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] "HEAD /e-business/mindshare HTTP/<*>" 404 <*> | - | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] "GET /architectures/convergence/niches/mindshare HTTP/<*>" 100 <*> | - | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*IP*> - - [<*>/Sep/<*>:<*>:<*>:<*> <*>] "PATCH /strategize/out-of-the-box HTTP/<*>" 401 <*> | - | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*IP*> - - [<*>/Sep/<*>:<*>:<*>:<*> <*>] "POST /users HTTP/<*>" 301 <*> | - +-----------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+ - -Brain Example 2: Extract log patterns with custom parameters -============================================================ - -This example shows how to extract semantic meaningful log patterns from a raw log field using custom parameters of the brain algorithm. - -PPL query:: - - os> source=apache | patterns message method=brain variable_count_threshold=2 | fields message, patterns_field ; - fetched rows / total rows = 4/4 - +-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------+ - | message | patterns_field | - |-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------| - | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | - | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | - | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | - | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | - +-----------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------+ - -Brain Example 3: Return log patterns aggregation result -======================================================= - -This example shows how to get aggregated results from a raw log field using the brain algorithm. - -PPL query:: - - os> source=apache | patterns message method=brain mode=aggregation variable_count_threshold=2 | fields patterns_field, pattern_count, sample_logs ; - fetched rows / total rows = 1/1 - +----------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | patterns_field | pattern_count | sample_logs | - |----------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | <*IP*> - <*> [<*>/Sep/<*>:<*>:<*>:<*> <*>] <*> <*> HTTP/<*>" <*> <*> | 4 | [177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927,127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722,118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439,210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481] | - +----------------------------------------------------------------------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Brain Example 4: Return log patterns aggregation result with detected variable tokens -===================================================================================== - -This example shows how to get aggregated results with detected variable tokens using the brain algorithm. - -Configuration -------------- -With option ``show_numbered_token`` enabled, the output can detect numbered variable tokens from the pattern field. - -PPL query:: - - os> source=apache | patterns message method=brain mode=aggregation show_numbered_token=true variable_count_threshold=2 | fields patterns_field, pattern_count, tokens ; - fetched rows / total rows = 1/1 - +----------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | patterns_field | pattern_count | tokens | - |----------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| - | - [/Sep/::: ] HTTP/" | 4 | {'': ['19927', '28722', '27439', '9481'], '': ['10', '10', '10', '10'], '': ['2022', '2022', '2022', '2022'], '': ['57', '57', '57', '57'], '': ['15', '15', '15', '15'], '': ['"HEAD', '"GET', '"PATCH', '"POST'], '': ['-0700', '-0700', '-0700', '-0700'], '': ['/e-business/mindshare', '/architectures/convergence/niches/mindshare', '/strategize/out-of-the-box', '/users'], '': ['177.95.8.74', '127.45.152.6', '118.223.210.10... | - +----------------------------------------------------------------------------------------------------------------------------------------+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Limitations -=========== - -- Patterns command is not pushed down to OpenSearch data node for now. It will only group log patterns on log messages returned to coordinator node. diff --git a/docs/user/ppl/cmd/rare.md b/docs/user/ppl/cmd/rare.md new file mode 100644 index 00000000000..6ee51c9f96a --- /dev/null +++ b/docs/user/ppl/cmd/rare.md @@ -0,0 +1,146 @@ +# rare + +## Description + +The `rare` command finds the least common tuple of values of all fields in the field list. +**Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields. +## Syntax + +rare [rare-options] \ [by-clause] +* field-list: mandatory. Comma-delimited list of field names. +* by-clause: optional. One or more fields to group the results by. +* rare-options: optional. Options for the rare command. Supported syntax is [countfield=\] [showcount=\]. +* showcount=\: optional. Whether to create a field in output that represent a count of the tuple of values. **Default:** `true`. +* countfield=\: optional. The name of the field that contains count. **Default:** `'count'`. +* usenull=\: optional. whether to output the null value. **Default:** Determined by `plugins.ppl.syntax.legacy.preferred`: + * When `plugins.ppl.syntax.legacy.preferred=true`, `usenull` defaults to `true` + * When `plugins.ppl.syntax.legacy.preferred=false`, `usenull` defaults to `false` + +## Example 1: Find the least common values in a field + +This example shows how to find the least common gender of all the accounts. + +```ppl +source=accounts +| rare showcount=false gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+ +| gender | +|--------| +| F | +| M | ++--------+ +``` + +## Example 2: Find the least common values organized by gender + +This example shows how to find the least common age of all the accounts grouped by gender. + +```ppl +source=accounts +| rare showcount=false age by gender +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+-----+ +| gender | age | +|--------+-----| +| F | 28 | +| M | 32 | +| M | 33 | +| M | 36 | ++--------+-----+ +``` + +## Example 3: Rare command + +This example shows how to find the least common gender of all the accounts. + +```ppl +source=accounts +| rare gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+-------+ +| gender | count | +|--------+-------| +| F | 1 | +| M | 3 | ++--------+-------+ +``` + +## Example 4: Specify the count field option + +This example shows how to specify the count field. + +```ppl +source=accounts +| rare countfield='cnt' gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+-----+ +| gender | cnt | +|--------+-----| +| F | 1 | +| M | 3 | ++--------+-----+ +``` + +## Example 5: Specify the usenull field option + +```ppl +source=accounts +| rare usenull=false email +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----------------------+-------+ +| email | count | +|-----------------------+-------| +| amberduke@pyrami.com | 1 | +| daleadams@boink.com | 1 | +| hattiebond@netagy.com | 1 | ++-----------------------+-------+ +``` + +```ppl +source=accounts +| rare usenull=true email +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+-------+ +| email | count | +|-----------------------+-------| +| null | 1 | +| amberduke@pyrami.com | 1 | +| daleadams@boink.com | 1 | +| hattiebond@netagy.com | 1 | ++-----------------------+-------+ +``` + +## Limitations + +The `rare` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. \ No newline at end of file diff --git a/docs/user/ppl/cmd/rare.rst b/docs/user/ppl/cmd/rare.rst deleted file mode 100644 index e72c8c8c2c9..00000000000 --- a/docs/user/ppl/cmd/rare.rst +++ /dev/null @@ -1,132 +0,0 @@ -==== -rare -==== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``rare`` command finds the least common tuple of values of all fields in the field list. - -| **Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields. - -Syntax -====== -rare [rare-options] [by-clause] - -* field-list: mandatory. Comma-delimited list of field names. -* by-clause: optional. One or more fields to group the results by. -* rare-options: optional. Options for the rare command. Supported syntax is [countfield=] [showcount=]. -* showcount=: optional. Whether to create a field in output that represent a count of the tuple of values. **Default:** ``true``. -* countfield=: optional. The name of the field that contains count. **Default:** ``'count'``. -* usenull=: optional. whether to output the null value. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``: - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` - -Example 1: Find the least common values in a field -================================================== - -This example shows how to find the least common gender of all the accounts. - -PPL query:: - - os> source=accounts | rare showcount=false gender; - fetched rows / total rows = 2/2 - +--------+ - | gender | - |--------| - | F | - | M | - +--------+ - - -Example 2: Find the least common values organized by gender -=========================================================== - -This example shows how to find the least common age of all the accounts grouped by gender. - -PPL query:: - - os> source=accounts | rare showcount=false age by gender; - fetched rows / total rows = 4/4 - +--------+-----+ - | gender | age | - |--------+-----| - | F | 28 | - | M | 32 | - | M | 33 | - | M | 36 | - +--------+-----+ - -Example 3: Rare command -======================= - -This example shows how to find the least common gender of all the accounts. - -PPL query:: - - os> source=accounts | rare gender; - fetched rows / total rows = 2/2 - +--------+-------+ - | gender | count | - |--------+-------| - | F | 1 | - | M | 3 | - +--------+-------+ - - -Example 4: Specify the count field option -========================================= - -This example shows how to specify the count field. - -PPL query:: - - os> source=accounts | rare countfield='cnt' gender; - fetched rows / total rows = 2/2 - +--------+-----+ - | gender | cnt | - |--------+-----| - | F | 1 | - | M | 3 | - +--------+-----+ - - -Example 5: Specify the usenull field option -=========================================== - -PPL query:: - - os> source=accounts | rare usenull=false email; - fetched rows / total rows = 3/3 - +-----------------------+-------+ - | email | count | - |-----------------------+-------| - | amberduke@pyrami.com | 1 | - | daleadams@boink.com | 1 | - | hattiebond@netagy.com | 1 | - +-----------------------+-------+ - -PPL query:: - - os> source=accounts | rare usenull=true email; - fetched rows / total rows = 4/4 - +-----------------------+-------+ - | email | count | - |-----------------------+-------| - | null | 1 | - | amberduke@pyrami.com | 1 | - | daleadams@boink.com | 1 | - | hattiebond@netagy.com | 1 | - +-----------------------+-------+ - - -Limitations -=========== -The ``rare`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. diff --git a/docs/user/ppl/cmd/regex.md b/docs/user/ppl/cmd/regex.md new file mode 100644 index 00000000000..d108b635abc --- /dev/null +++ b/docs/user/ppl/cmd/regex.md @@ -0,0 +1,155 @@ +# regex + +## Description + +The `regex` command filters search results by matching field values against a regular expression pattern. Only documents where the specified field matches the pattern are included in the results. +## Syntax + +regex \ = \ +regex \ != \ +* field: mandatory. The field name to match against. +* pattern: mandatory string. The regular expression pattern to match. Supports Java regex syntax including named groups, lookahead/lookbehind, and character classes. +* = : operator for positive matching (include matches) +* != : operator for negative matching (exclude matches) + +## Regular Expression Engine + +The regex command uses Java's built-in regular expression engine, which supports: +* **Standard regex features**: Character classes, quantifiers, anchors +* **Named capture groups**: `(?pattern)` syntax +* **Lookahead/lookbehind**: `(?=...)` and `(?<=...)` assertions +* **Inline flags**: Case-insensitive `(?i)`, multiline `(?m)`, dotall `(?s)`, and other modes + +For complete documentation of Java regex patterns and available modes, see the [Java Pattern documentation](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html). +## Example 1: Basic pattern matching + +This example shows how to filter documents where the `lastname` field matches names starting with uppercase letters. + +```ppl +source=accounts +| regex lastname="^[A-Z][a-z]+$" +| fields account_number, firstname, lastname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------+ +| account_number | firstname | lastname | +|----------------+-----------+----------| +| 1 | Amber | Duke | +| 6 | Hattie | Bond | +| 13 | Nanette | Bates | +| 18 | Dale | Adams | ++----------------+-----------+----------+ +``` + +## Example 2: Negative matching + +This example shows how to exclude documents where the `lastname` field ends with "son". + +```ppl +source=accounts +| regex lastname!=".*son$" +| fields account_number, lastname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+----------+ +| account_number | lastname | +|----------------+----------| +| 1 | Duke | +| 6 | Bond | +| 13 | Bates | +| 18 | Adams | ++----------------+----------+ +``` + +## Example 3: Email domain matching + +This example shows how to filter documents by email domain patterns. + +```ppl +source=accounts +| regex email="@pyrami\.com$" +| fields account_number, email +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+----------------------+ +| account_number | email | +|----------------+----------------------| +| 1 | amberduke@pyrami.com | ++----------------+----------------------+ +``` + +## Example 4: Complex patterns with character classes + +This example shows how to use complex regex patterns with character classes and quantifiers. + +```ppl +source=accounts | regex address="\\d{3,4}\\s+[A-Z][a-z]+\\s+(Street|Lane|Court)" | fields account_number, address +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+----------------------+ +| account_number | address | +|----------------+----------------------| +| 1 | 880 Holmes Lane | +| 6 | 671 Bristol Street | +| 13 | 789 Madison Street | +| 18 | 467 Hutchinson Court | ++----------------+----------------------+ +``` + +## Example 5: Case-sensitive matching + +This example demonstrates that regex matching is case-sensitive by default. + +```ppl +source=accounts +| regex state="va" +| fields account_number, state +``` + +Expected output: + +```text +fetched rows / total rows = 0/0 ++----------------+-------+ +| account_number | state | +|----------------+-------| ++----------------+-------+ +``` + +```ppl +source=accounts +| regex state="VA" +| fields account_number, state +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+-------+ +| account_number | state | +|----------------+-------| +| 13 | VA | ++----------------+-------+ +``` + +## Limitations + +* **Field specification required**: A field name must be specified in the regex command. Pattern-only syntax (e.g., `regex "pattern"`) is not currently supported +* **String fields only**: The regex command currently only supports string fields. Using it on numeric or boolean fields will result in an error \ No newline at end of file diff --git a/docs/user/ppl/cmd/regex.rst b/docs/user/ppl/cmd/regex.rst deleted file mode 100644 index 154949ba133..00000000000 --- a/docs/user/ppl/cmd/regex.rst +++ /dev/null @@ -1,140 +0,0 @@ -===== -regex -===== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``regex`` command filters search results by matching field values against a regular expression pattern. Only documents where the specified field matches the pattern are included in the results. - -Syntax -====== -regex = -regex != - -* field: mandatory. The field name to match against. -* pattern: mandatory string. The regular expression pattern to match. Supports Java regex syntax including named groups, lookahead/lookbehind, and character classes. -* = : operator for positive matching (include matches) -* != : operator for negative matching (exclude matches) - -Regular Expression Engine -========================= - -The regex command uses Java's built-in regular expression engine, which supports: - -* **Standard regex features**: Character classes, quantifiers, anchors -* **Named capture groups**: ``(?pattern)`` syntax -* **Lookahead/lookbehind**: ``(?=...)`` and ``(?<=...)`` assertions -* **Inline flags**: Case-insensitive ``(?i)``, multiline ``(?m)``, dotall ``(?s)``, and other modes - -For complete documentation of Java regex patterns and available modes, see the `Java Pattern documentation `_. - -Example 1: Basic pattern matching -================================= - -This example shows how to filter documents where the ``lastname`` field matches names starting with uppercase letters. - -PPL query:: - - os> source=accounts | regex lastname="^[A-Z][a-z]+$" | fields account_number, firstname, lastname; - fetched rows / total rows = 4/4 - +----------------+-----------+----------+ - | account_number | firstname | lastname | - |----------------+-----------+----------| - | 1 | Amber | Duke | - | 6 | Hattie | Bond | - | 13 | Nanette | Bates | - | 18 | Dale | Adams | - +----------------+-----------+----------+ - - -Example 2: Negative matching -============================ - -This example shows how to exclude documents where the ``lastname`` field ends with "son". - -PPL query:: - - os> source=accounts | regex lastname!=".*son$" | fields account_number, lastname; - fetched rows / total rows = 4/4 - +----------------+----------+ - | account_number | lastname | - |----------------+----------| - | 1 | Duke | - | 6 | Bond | - | 13 | Bates | - | 18 | Adams | - +----------------+----------+ - - -Example 3: Email domain matching -================================ - -This example shows how to filter documents by email domain patterns. - -PPL query:: - - os> source=accounts | regex email="@pyrami\.com$" | fields account_number, email; - fetched rows / total rows = 1/1 - +----------------+----------------------+ - | account_number | email | - |----------------+----------------------| - | 1 | amberduke@pyrami.com | - +----------------+----------------------+ - - -Example 4: Complex patterns with character classes -================================================== - -This example shows how to use complex regex patterns with character classes and quantifiers. - -PPL query:: - - os> source=accounts | regex address="\d{3,4}\s+[A-Z][a-z]+\s+(Street|Lane|Court)" | fields account_number, address; - fetched rows / total rows = 4/4 - +----------------+----------------------+ - | account_number | address | - |----------------+----------------------| - | 1 | 880 Holmes Lane | - | 6 | 671 Bristol Street | - | 13 | 789 Madison Street | - | 18 | 467 Hutchinson Court | - +----------------+----------------------+ - - -Example 5: Case-sensitive matching -================================== - -This example demonstrates that regex matching is case-sensitive by default. - -PPL query:: - - os> source=accounts | regex state="va" | fields account_number, state; - fetched rows / total rows = 0/0 - +----------------+-------+ - | account_number | state | - |----------------+-------| - +----------------+-------+ - -PPL query:: - - os> source=accounts | regex state="VA" | fields account_number, state; - fetched rows / total rows = 1/1 - +----------------+-------+ - | account_number | state | - |----------------+-------| - | 13 | VA | - +----------------+-------+ - - -Limitations -=========== - -| * **Field specification required**: A field name must be specified in the regex command. Pattern-only syntax (e.g., ``regex "pattern"``) is not currently supported -| * **String fields only**: The regex command currently only supports string fields. Using it on numeric or boolean fields will result in an error diff --git a/docs/user/ppl/cmd/rename.md b/docs/user/ppl/cmd/rename.md new file mode 100644 index 00000000000..346513f232e --- /dev/null +++ b/docs/user/ppl/cmd/rename.md @@ -0,0 +1,142 @@ +# rename + +## Description + +The `rename` command renames one or more fields in the search result. +## Syntax + +rename \ AS \["," \ AS \]... +* source-field: mandatory. The name of the field you want to rename. Supports wildcard patterns using `*`. +* target-field: mandatory. The name you want to rename to. Must have same number of wildcards as the source. + +## Behavior + +The rename command handles non-existent fields as follows: +* **Renaming a non-existent field to a non-existent field**: No change occurs to the result set. +* **Renaming a non-existent field to an existing field**: The existing target field is removed from the result set. +* **Renaming an existing field to an existing field**: The existing target field is removed and the source field is renamed to the target. + +## Example 1: Rename one field + +This example shows how to rename one field. + +```ppl +source=accounts +| rename account_number as an +| fields an +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----+ +| an | +|----| +| 1 | +| 6 | +| 13 | +| 18 | ++----+ +``` + +## Example 2: Rename multiple fields + +This example shows how to rename multiple fields. + +```ppl +source=accounts +| rename account_number as an, employer as emp +| fields an, emp +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----+---------+ +| an | emp | +|----+---------| +| 1 | Pyrami | +| 6 | Netagy | +| 13 | Quility | +| 18 | null | ++----+---------+ +``` + +## Example 3: Rename with wildcards + +This example shows how to rename multiple fields using wildcard patterns. + +```ppl +source=accounts +| rename *name as *_name +| fields first_name, last_name +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++------------+-----------+ +| first_name | last_name | +|------------+-----------| +| Amber | Duke | +| Hattie | Bond | +| Nanette | Bates | +| Dale | Adams | ++------------+-----------+ +``` + +## Example 4: Rename with multiple wildcard patterns + +This example shows how to rename multiple fields using multiple wildcard patterns. + +```ppl +source=accounts +| rename *name as *_name, *_number as *number +| fields first_name, last_name, accountnumber +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++------------+-----------+---------------+ +| first_name | last_name | accountnumber | +|------------+-----------+---------------| +| Amber | Duke | 1 | +| Hattie | Bond | 6 | +| Nanette | Bates | 13 | +| Dale | Adams | 18 | ++------------+-----------+---------------+ +``` + +## Example 5: Rename existing field to existing field + +This example shows how to rename an existing field to an existing field. The target field gets removed and the source field is renamed to the target field. + +```ppl +source=accounts +| rename firstname as age +| fields age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+ +| age | +|---------| +| Amber | +| Hattie | +| Nanette | +| Dale | ++---------+ +``` + +## Limitations + +The `rename` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. +Literal asterisk (*) characters in field names cannot be replaced as asterisk is used for wildcard matching. \ No newline at end of file diff --git a/docs/user/ppl/cmd/rename.rst b/docs/user/ppl/cmd/rename.rst deleted file mode 100644 index eb92a45b8cb..00000000000 --- a/docs/user/ppl/cmd/rename.rst +++ /dev/null @@ -1,130 +0,0 @@ -====== -rename -====== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``rename`` command renames one or more fields in the search result. - -Syntax -====== -rename AS ["," AS ]... - -* source-field: mandatory. The name of the field you want to rename. Supports wildcard patterns using ``*``. -* target-field: mandatory. The name you want to rename to. Must have same number of wildcards as the source. - -Behavior -======== - -The rename command handles non-existent fields as follows: - -* **Renaming a non-existent field to a non-existent field**: No change occurs to the result set. -* **Renaming a non-existent field to an existing field**: The existing target field is removed from the result set. -* **Renaming an existing field to an existing field**: The existing target field is removed and the source field is renamed to the target. - -Example 1: Rename one field -=========================== - -This example shows how to rename one field. - -PPL query:: - - os> source=accounts | rename account_number as an | fields an; - fetched rows / total rows = 4/4 - +----+ - | an | - |----| - | 1 | - | 6 | - | 13 | - | 18 | - +----+ - - -Example 2: Rename multiple fields -================================= - -This example shows how to rename multiple fields. - -PPL query:: - - os> source=accounts | rename account_number as an, employer as emp | fields an, emp; - fetched rows / total rows = 4/4 - +----+---------+ - | an | emp | - |----+---------| - | 1 | Pyrami | - | 6 | Netagy | - | 13 | Quility | - | 18 | null | - +----+---------+ - - -Example 3: Rename with wildcards -================================ - -This example shows how to rename multiple fields using wildcard patterns. - -PPL query:: - - os> source=accounts | rename *name as *_name | fields first_name, last_name; - fetched rows / total rows = 4/4 - +------------+-----------+ - | first_name | last_name | - |------------+-----------| - | Amber | Duke | - | Hattie | Bond | - | Nanette | Bates | - | Dale | Adams | - +------------+-----------+ - - -Example 4: Rename with multiple wildcard patterns -================================================= - -This example shows how to rename multiple fields using multiple wildcard patterns. - -PPL query:: - - os> source=accounts | rename *name as *_name, *_number as *number | fields first_name, last_name, accountnumber; - fetched rows / total rows = 4/4 - +------------+-----------+---------------+ - | first_name | last_name | accountnumber | - |------------+-----------+---------------| - | Amber | Duke | 1 | - | Hattie | Bond | 6 | - | Nanette | Bates | 13 | - | Dale | Adams | 18 | - +------------+-----------+---------------+ - -Example 5: Rename existing field to existing field -================================================== - -This example shows how to rename an existing field to an existing field. The target field gets removed and the source field is renamed to the target field. - - -PPL query:: - - os> source=accounts | rename firstname as age | fields age; - fetched rows / total rows = 4/4 - +---------+ - | age | - |---------| - | Amber | - | Hattie | - | Nanette | - | Dale | - +---------+ - - -Limitations -=========== -| The ``rename`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. -| Literal asterisk (*) characters in field names cannot be replaced as asterisk is used for wildcard matching. diff --git a/docs/user/ppl/cmd/replace.md b/docs/user/ppl/cmd/replace.md new file mode 100644 index 00000000000..2333f46b3b2 --- /dev/null +++ b/docs/user/ppl/cmd/replace.md @@ -0,0 +1,330 @@ +# replace + +## Description + +The `replace` replaces text in one or more fields in the search result. Supports literal string replacement and wildcard patterns using `*`. +## Syntax + +replace '\' WITH '\' [, '\' WITH '\']... IN \[, \]... +* pattern: mandatory. The text pattern you want to replace. +* replacement: mandatory. The text you want to replace with. +* field-name: mandatory. One or more field names where the replacement should occur. + +## Example 1: Replace text in one field + +This example shows replacing text in one field. + +```ppl +source=accounts +| replace "IL" WITH "Illinois" IN state +| fields state +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------+ +| state | +|----------| +| Illinois | +| TN | +| VA | +| MD | ++----------+ +``` + +## Example 2: Replace text in multiple fields + +This example shows replacing text in multiple fields. + +```ppl +source=accounts +| replace "IL" WITH "Illinois" IN state, address +| fields state, address +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------+----------------------+ +| state | address | +|----------+----------------------| +| Illinois | 880 Holmes Lane | +| TN | 671 Bristol Street | +| VA | 789 Madison Street | +| MD | 467 Hutchinson Court | ++----------+----------------------+ +``` + +## Example 3: Replace with other commands in a pipeline + +This example shows using replace with other commands in a query pipeline. + +```ppl +source=accounts +| replace "IL" WITH "Illinois" IN state +| where age > 30 +| fields state, age +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------+-----+ +| state | age | +|----------+-----| +| Illinois | 32 | +| TN | 36 | +| MD | 33 | ++----------+-----+ +``` + +## Example 4: Replace with multiple pattern/replacement pairs + +This example shows using multiple pattern/replacement pairs in a single replace command. The replacements are applied sequentially. + +```ppl +source=accounts +| replace "IL" WITH "Illinois", "TN" WITH "Tennessee" IN state +| fields state +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+ +| state | +|-----------| +| Illinois | +| Tennessee | +| VA | +| MD | ++-----------+ +``` + +## Example 5: Pattern matching with LIKE and replace + +Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. + +```ppl +source=accounts +| where LIKE(address, '%Holmes%') +| replace "Holmes" WITH "HOLMES" IN address +| fields address, state, gender, age, city +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------+-------+--------+-----+--------+ +| address | state | gender | age | city | +|-----------------+-------+--------+-----+--------| +| 880 HOLMES Lane | IL | M | 32 | Brogan | ++-----------------+-------+--------+-----+--------+ +``` + +## Example 6: Wildcard suffix match + +Replace values that end with a specific pattern. The wildcard `*` matches any prefix. + +```ppl +source=accounts +| replace "*IL" WITH "Illinois" IN state +| fields state +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------+ +| state | +|----------| +| Illinois | +| TN | +| VA | +| MD | ++----------+ +``` + +## Example 7: Wildcard prefix match + +Replace values that start with a specific pattern. The wildcard `*` matches any suffix. + +```ppl +source=accounts +| replace "IL*" WITH "Illinois" IN state +| fields state +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------+ +| state | +|----------| +| Illinois | +| TN | +| VA | +| MD | ++----------+ +``` + +## Example 8: Wildcard capture and substitution + +Use wildcards in both pattern and replacement to capture and reuse matched portions. The number of wildcards must match in pattern and replacement. + +```ppl +source=accounts +| replace "* Lane" WITH "Lane *" IN address +| fields address +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------------+ +| address | +|----------------------| +| Lane 880 Holmes | +| 671 Bristol Street | +| 789 Madison Street | +| 467 Hutchinson Court | ++----------------------+ +``` + +## Example 9: Multiple wildcards for pattern transformation + +Use multiple wildcards to transform patterns. Each wildcard in the replacement substitutes the corresponding captured value. + +```ppl +source=accounts +| replace "* *" WITH "*_*" IN address +| fields address +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------------+ +| address | +|----------------------| +| 880_Holmes Lane | +| 671_Bristol Street | +| 789_Madison Street | +| 467_Hutchinson Court | ++----------------------+ +``` + +## Example 10: Wildcard with zero wildcards in replacement + +When replacement has zero wildcards, all matching values are replaced with the literal replacement string. + +```ppl +source=accounts +| replace "*IL*" WITH "Illinois" IN state +| fields state +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------+ +| state | +|----------| +| Illinois | +| TN | +| VA | +| MD | ++----------+ +``` + +## Example 11: Matching literal asterisks + +Use `\*` to match literal asterisk characters (`\*` = literal asterisk, `\\` = literal backslash). + +```ppl +source=accounts +| eval note = 'price: *sale*' +| replace 'price: \*sale\*' WITH 'DISCOUNTED' IN note +| fields note +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++------------+ +| note | +|------------| +| DISCOUNTED | +| DISCOUNTED | +| DISCOUNTED | +| DISCOUNTED | ++------------+ +``` + +## Example 12: Wildcard with no replacement wildcards + +Use wildcards in pattern but none in replacement to create a fixed output. + +```ppl +source=accounts +| eval test = 'prefix-value-suffix' +| replace 'prefix-*-suffix' WITH 'MATCHED' IN test +| fields test +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+ +| test | +|---------| +| MATCHED | +| MATCHED | +| MATCHED | +| MATCHED | ++---------+ +``` + +## Example 13: Escaped asterisks with wildcards + +Combine escaped asterisks (literal) with wildcards for complex patterns. + +```ppl +source=accounts +| eval label = 'file123.txt' +| replace 'file*.*' WITH '\**.*' IN label +| fields label +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------+ +| label | +|----------| +| *123.txt | +| *123.txt | +| *123.txt | +| *123.txt | ++----------+ +``` + +## Limitations + +* Wildcards: `*` matches zero or more characters (case-sensitive) +* Replacement wildcards must match pattern wildcard count, or be zero +* Escape sequences: `\*` (literal asterisk), `\\` (literal backslash) \ No newline at end of file diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst deleted file mode 100644 index 60a28bc8ce0..00000000000 --- a/docs/user/ppl/cmd/replace.rst +++ /dev/null @@ -1,268 +0,0 @@ -======= -replace -======= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``replace`` replaces text in one or more fields in the search result. Supports literal string replacement and wildcard patterns using ``*``. - - -Syntax -====== -replace '' WITH '' [, '' WITH '']... IN [, ]... - -* pattern: mandatory. The text pattern you want to replace. -* replacement: mandatory. The text you want to replace with. -* field-name: mandatory. One or more field names where the replacement should occur. - -Example 1: Replace text in one field -==================================== - -This example shows replacing text in one field. - -PPL query:: - - os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state; - fetched rows / total rows = 4/4 - +----------+ - | state | - |----------| - | Illinois | - | TN | - | VA | - | MD | - +----------+ - - -Example 2: Replace text in multiple fields -========================================== - -This example shows replacing text in multiple fields. - -PPL query:: - - os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address; - fetched rows / total rows = 4/4 - +----------+----------------------+ - | state | address | - |----------+----------------------| - | Illinois | 880 Holmes Lane | - | TN | 671 Bristol Street | - | VA | 789 Madison Street | - | MD | 467 Hutchinson Court | - +----------+----------------------+ - - -Example 3: Replace with other commands in a pipeline -==================================================== - -This example shows using replace with other commands in a query pipeline. - -PPL query:: - - os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age; - fetched rows / total rows = 3/3 - +----------+-----+ - | state | age | - |----------+-----| - | Illinois | 32 | - | TN | 36 | - | MD | 33 | - +----------+-----+ - -Example 4: Replace with multiple pattern/replacement pairs -========================================================== - -This example shows using multiple pattern/replacement pairs in a single replace command. The replacements are applied sequentially. - -PPL query:: - - os> source=accounts | replace "IL" WITH "Illinois", "TN" WITH "Tennessee" IN state | fields state; - fetched rows / total rows = 4/4 - +-----------+ - | state | - |-----------| - | Illinois | - | Tennessee | - | VA | - | MD | - +-----------+ - -Example 5: Pattern matching with LIKE and replace -================================================= - -Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. - -PPL query:: - - os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city; - fetched rows / total rows = 1/1 - +-----------------+-------+--------+-----+--------+ - | address | state | gender | age | city | - |-----------------+-------+--------+-----+--------| - | 880 HOLMES Lane | IL | M | 32 | Brogan | - +-----------------+-------+--------+-----+--------+ - - -Example 6: Wildcard suffix match ---------------------------------- - -Replace values that end with a specific pattern. The wildcard ``*`` matches any prefix. - -PPL query:: - - os> source=accounts | replace "*IL" WITH "Illinois" IN state | fields state; - fetched rows / total rows = 4/4 - +----------+ - | state | - |----------| - | Illinois | - | TN | - | VA | - | MD | - +----------+ - - -Example 7: Wildcard prefix match ---------------------------------- - -Replace values that start with a specific pattern. The wildcard ``*`` matches any suffix. - -PPL query:: - - os> source=accounts | replace "IL*" WITH "Illinois" IN state | fields state; - fetched rows / total rows = 4/4 - +----------+ - | state | - |----------| - | Illinois | - | TN | - | VA | - | MD | - +----------+ - - -Example 8: Wildcard capture and substitution ---------------------------------------------- - -Use wildcards in both pattern and replacement to capture and reuse matched portions. The number of wildcards must match in pattern and replacement. - -PPL query:: - - os> source=accounts | replace "* Lane" WITH "Lane *" IN address | fields address; - fetched rows / total rows = 4/4 - +----------------------+ - | address | - |----------------------| - | Lane 880 Holmes | - | 671 Bristol Street | - | 789 Madison Street | - | 467 Hutchinson Court | - +----------------------+ - - -Example 9: Multiple wildcards for pattern transformation ---------------------------------------------------------- - -Use multiple wildcards to transform patterns. Each wildcard in the replacement substitutes the corresponding captured value. - -PPL query:: - - os> source=accounts | replace "* *" WITH "*_*" IN address | fields address; - fetched rows / total rows = 4/4 - +----------------------+ - | address | - |----------------------| - | 880_Holmes Lane | - | 671_Bristol Street | - | 789_Madison Street | - | 467_Hutchinson Court | - +----------------------+ - - -Example 10: Wildcard with zero wildcards in replacement --------------------------------------------------------- - -When replacement has zero wildcards, all matching values are replaced with the literal replacement string. - -PPL query:: - - os> source=accounts | replace "*IL*" WITH "Illinois" IN state | fields state; - fetched rows / total rows = 4/4 - +----------+ - | state | - |----------| - | Illinois | - | TN | - | VA | - | MD | - +----------+ - - -Example 11: Matching literal asterisks ---------------------------------------- - -Use ``\*`` to match literal asterisk characters (``\*`` = literal asterisk, ``\\`` = literal backslash). - -PPL query:: - - os> source=accounts | eval note = 'price: *sale*' | replace 'price: \*sale\*' WITH 'DISCOUNTED' IN note | fields note; - fetched rows / total rows = 4/4 - +------------+ - | note | - |------------| - | DISCOUNTED | - | DISCOUNTED | - | DISCOUNTED | - | DISCOUNTED | - +------------+ - -Example 12: Wildcard with no replacement wildcards ----------------------------------------------------- - -Use wildcards in pattern but none in replacement to create a fixed output. - -PPL query:: - - os> source=accounts | eval test = 'prefix-value-suffix' | replace 'prefix-*-suffix' WITH 'MATCHED' IN test | fields test; - fetched rows / total rows = 4/4 - +---------+ - | test | - |---------| - | MATCHED | - | MATCHED | - | MATCHED | - | MATCHED | - +---------+ - -Example 13: Escaped asterisks with wildcards ---------------------------------------------- - -Combine escaped asterisks (literal) with wildcards for complex patterns. - -PPL query:: - - os> source=accounts | eval label = 'file123.txt' | replace 'file*.*' WITH '\**.*' IN label | fields label; - fetched rows / total rows = 4/4 - +----------+ - | label | - |----------| - | *123.txt | - | *123.txt | - | *123.txt | - | *123.txt | - +----------+ - - -Limitations -=========== -* Wildcards: ``*`` matches zero or more characters (case-sensitive) -* Replacement wildcards must match pattern wildcard count, or be zero -* Escape sequences: ``\*`` (literal asterisk), ``\\`` (literal backslash) \ No newline at end of file diff --git a/docs/user/ppl/cmd/reverse.md b/docs/user/ppl/cmd/reverse.md new file mode 100644 index 00000000000..f63a8f18e95 --- /dev/null +++ b/docs/user/ppl/cmd/reverse.md @@ -0,0 +1,134 @@ +# reverse + +## Description + +The `reverse` command reverses the display order of search results. The same results are returned, but in reverse order. +## Syntax + +reverse +* No parameters: The reverse command takes no arguments or options. + +## Note + +The `reverse` command processes the entire dataset. If applied directly to millions of records, it will consume significant memory resources on the coordinating node. Users should only apply the `reverse` command to smaller datasets, typically after aggregation operations. +## Example 1: Basic reverse operation + +This example shows reversing the order of all documents. + +```ppl +source=accounts +| fields account_number, age +| reverse +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 6 | 36 | +| 18 | 33 | +| 1 | 32 | +| 13 | 28 | ++----------------+-----+ +``` + +## Example 2: Reverse with sort + +This example shows reversing results after sorting by age in ascending order, effectively giving descending order. + +```ppl +source=accounts +| sort age +| fields account_number, age +| reverse +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 6 | 36 | +| 18 | 33 | +| 1 | 32 | +| 13 | 28 | ++----------------+-----+ +``` + +## Example 3: Reverse with head + +This example shows using reverse with head to get the last 2 records from the original order. + +```ppl +source=accounts +| reverse +| head 2 +| fields account_number, age +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 6 | 36 | +| 18 | 33 | ++----------------+-----+ +``` + +## Example 4: Double reverse + +This example shows that applying reverse twice returns to the original order. + +```ppl +source=accounts +| reverse +| reverse +| fields account_number, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 13 | 28 | +| 1 | 32 | +| 18 | 33 | +| 6 | 36 | ++----------------+-----+ +``` + +## Example 5: Reverse with complex pipeline + +This example shows reverse working with filtering and field selection. + +```ppl +source=accounts +| where age > 30 +| fields account_number, age +| reverse +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 6 | 36 | +| 18 | 33 | +| 1 | 32 | ++----------------+-----+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/reverse.rst b/docs/user/ppl/cmd/reverse.rst deleted file mode 100644 index d839a687bf9..00000000000 --- a/docs/user/ppl/cmd/reverse.rst +++ /dev/null @@ -1,115 +0,0 @@ -======= -reverse -======= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``reverse`` command reverses the display order of search results. The same results are returned, but in reverse order. - -Syntax -====== -reverse - -* No parameters: The reverse command takes no arguments or options. - -Note -==== -| The `reverse` command processes the entire dataset. If applied directly to millions of records, it will consume significant memory resources on the coordinating node. Users should only apply the `reverse` command to smaller datasets, typically after aggregation operations. - -Example 1: Basic reverse operation -================================== - -This example shows reversing the order of all documents. - -PPL query:: - - os> source=accounts | fields account_number, age | reverse; - fetched rows / total rows = 4/4 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 6 | 36 | - | 18 | 33 | - | 1 | 32 | - | 13 | 28 | - +----------------+-----+ - - -Example 2: Reverse with sort -============================ - -This example shows reversing results after sorting by age in ascending order, effectively giving descending order. - -PPL query:: - - os> source=accounts | sort age | fields account_number, age | reverse; - fetched rows / total rows = 4/4 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 6 | 36 | - | 18 | 33 | - | 1 | 32 | - | 13 | 28 | - +----------------+-----+ - - -Example 3: Reverse with head -============================ - -This example shows using reverse with head to get the last 2 records from the original order. - -PPL query:: - - os> source=accounts | reverse | head 2 | fields account_number, age; - fetched rows / total rows = 2/2 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 6 | 36 | - | 18 | 33 | - +----------------+-----+ - - -Example 4: Double reverse -========================= - -This example shows that applying reverse twice returns to the original order. - -PPL query:: - - os> source=accounts | reverse | reverse | fields account_number, age; - fetched rows / total rows = 4/4 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 13 | 28 | - | 1 | 32 | - | 18 | 33 | - | 6 | 36 | - +----------------+-----+ - - -Example 5: Reverse with complex pipeline -======================================== - -This example shows reverse working with filtering and field selection. - -PPL query:: - - os> source=accounts | where age > 30 | fields account_number, age | reverse; - fetched rows / total rows = 3/3 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 6 | 36 | - | 18 | 33 | - | 1 | 32 | - +----------------+-----+ diff --git a/docs/user/ppl/cmd/rex.md b/docs/user/ppl/cmd/rex.md new file mode 100644 index 00000000000..0f117373d8e --- /dev/null +++ b/docs/user/ppl/cmd/rex.md @@ -0,0 +1,291 @@ +# rex + +## Description + +The `rex` command extracts fields from a raw text field using regular expression named capture groups. +## Syntax + +rex [mode=\] field=\ \ [max_match=\] [offset_field=\] +* field: mandatory. The field must be a string field to extract data from. +* pattern: mandatory string. The regular expression pattern with named capture groups used to extract new fields. Pattern must contain at least one named capture group using `(?pattern)` syntax. +* mode: optional. Either `extract` or `sed`. **Default:** extract + * **extract mode** (default): Creates new fields from regular expression named capture groups. This is the standard field extraction behavior. + * **sed mode**: Performs text substitution on the field using sed-style patterns + * `s/pattern/replacement/` - Replace first occurrence + * `s/pattern/replacement/g` - Replace all occurrences (global) + * `s/pattern/replacement/n` - Replace only the nth occurrence (where n is a number) + * `y/from_chars/to_chars/` - Character-by-character transliteration + * Backreferences: `\1`, `\2`, etc. reference captured groups in replacement +* max_match: optional integer (default=1). Maximum number of matches to extract. If greater than 1, extracted fields become arrays. The value 0 means unlimited matches, but is automatically capped to the configured limit (default: 10, configurable via `plugins.ppl.rex.max_match.limit`). +* offset_field: optional string. Field name to store the character offset positions of matches. Only available in extract mode. + +## Example 1: Basic Field Extraction + +This example shows extracting username and domain from email addresses using named capture groups. Both extracted fields are returned as string type. + +```ppl +source=accounts +| rex field=email "(?[^@]+)@(?[^.]+)" +| fields email, username, domain +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------------------+------------+--------+ +| email | username | domain | +|-----------------------+------------+--------| +| amberduke@pyrami.com | amberduke | pyrami | +| hattiebond@netagy.com | hattiebond | netagy | ++-----------------------+------------+--------+ +``` + +## Example 2: Handling Non-matching Patterns + +This example shows the rex command returning all events, setting extracted fields to null for non-matching patterns. Extracted fields would be string type when matches are found. + +```ppl +source=accounts +| rex field=email "(?[^@]+)@(?gmail\\.com)" +| fields email, user, domain +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------------------+------+--------+ +| email | user | domain | +|-----------------------+------+--------| +| amberduke@pyrami.com | null | null | +| hattiebond@netagy.com | null | null | ++-----------------------+------+--------+ +``` + +## Example 3: Multiple Matches with max_match + +This example shows extracting multiple words from address field using max_match parameter. The extracted field is returned as an array type containing string elements. + +```ppl +source=accounts +| rex field=address "(?[A-Za-z]+)" max_match=2 +| fields address, words +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++--------------------+------------------+ +| address | words | +|--------------------+------------------| +| 880 Holmes Lane | [Holmes,Lane] | +| 671 Bristol Street | [Bristol,Street] | +| 789 Madison Street | [Madison,Street] | ++--------------------+------------------+ +``` + +## Example 4: Text Replacement with mode=sed + +This example shows replacing email domains using sed mode for text substitution. The extracted field is returned as string type. + +```ppl +source=accounts +| rex field=email mode=sed "s/@.*/@company.com/" +| fields email +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++------------------------+ +| email | +|------------------------| +| amberduke@company.com | +| hattiebond@company.com | ++------------------------+ +``` + +## Example 5: Using offset_field + +This example shows tracking the character positions where matches occur. Extracted fields are string type, and the offset_field is also string type. + +```ppl +source=accounts +| rex field=email "(?[^@]+)@(?[^.]+)" offset_field=matchpos +| fields email, username, domain, matchpos +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------------------+------------+--------+---------------------------+ +| email | username | domain | matchpos | +|-----------------------+------------+--------+---------------------------| +| amberduke@pyrami.com | amberduke | pyrami | domain=10-15&username=0-8 | +| hattiebond@netagy.com | hattiebond | netagy | domain=11-16&username=0-9 | ++-----------------------+------------+--------+---------------------------+ +``` + +## Example 6: Complex Email Pattern + +This example shows extracting comprehensive email components including top-level domain. All extracted fields are returned as string type. + +```ppl +source=accounts +| rex field=email "(?[a-zA-Z0-9._%+-]+)@(?[a-zA-Z0-9.-]+)\\.(?[a-zA-Z]{2,})" +| fields email, user, domain, tld +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------------------+------------+--------+-----+ +| email | user | domain | tld | +|-----------------------+------------+--------+-----| +| amberduke@pyrami.com | amberduke | pyrami | com | +| hattiebond@netagy.com | hattiebond | netagy | com | ++-----------------------+------------+--------+-----+ +``` + +## Example 7: Chaining Multiple rex Commands + +This example shows extracting initial letters from both first and last names. All extracted fields are returned as string type. + +```ppl +source=accounts +| rex field=firstname "(?^.)" +| rex field=lastname "(?^.)" +| fields firstname, lastname, firstinitial, lastinitial +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----------+----------+--------------+-------------+ +| firstname | lastname | firstinitial | lastinitial | +|-----------+----------+--------------+-------------| +| Amber | Duke | A | D | +| Hattie | Bond | H | B | +| Nanette | Bates | N | B | ++-----------+----------+--------------+-------------+ +``` + +## Example 8: Named Capture Group Limitations + +This example demonstrates naming restrictions for capture groups. Group names cannot contain underscores due to Java regex limitations. +Invalid PPL query with underscores + +```ppl +source=accounts +| rex field=email "(?[^@]+)@(?[^.]+)" +| fields email, user_name, email_domain +``` + +Expected output: + +```text +{'reason': 'Invalid Query', 'details': "Invalid capture group name 'user_name'. Java regex group names must start with a letter and contain only letters and digits.", 'type': 'IllegalArgumentException'} +Error: Query returned no data +``` + +Correct PPL query without underscores + +```ppl +source=accounts +| rex field=email "(?[^@]+)@(?[^.]+)" +| fields email, username, emaildomain +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------------------+------------+-------------+ +| email | username | emaildomain | +|-----------------------+------------+-------------| +| amberduke@pyrami.com | amberduke | pyrami | +| hattiebond@netagy.com | hattiebond | netagy | ++-----------------------+------------+-------------+ +``` + +## Example 9: Max Match Limit Protection + +This example demonstrates the max_match limit protection mechanism. When max_match=0 (unlimited) is specified, the system automatically caps it to prevent memory exhaustion. +PPL query with max_match=0 automatically capped to default limit of 10 + +```ppl +source=accounts +| rex field=address "(?\\d*)" max_match=0 +| eval digit_count=array_length(digit) +| fields address, digit_count +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------+-------------+ +| address | digit_count | +|-----------------+-------------| +| 880 Holmes Lane | 10 | ++-----------------+-------------+ +``` + +PPL query exceeding the configured limit results in an error + +```ppl +source=accounts +| rex field=address "(?\\d*)" max_match=100 +| fields address, digit +| head 1 +``` + +Expected output: + +```text +{'reason': 'Invalid Query', 'details': 'Rex command max_match value (100) exceeds the configured limit (10). Consider using a smaller max_match value or adjust the plugins.ppl.rex.max_match.limit setting.', 'type': 'IllegalArgumentException'} +Error: Query returned no data +``` + +## Comparison with Related Commands + +| Feature | rex | parse | +| --- | --- | --- | +| Pattern Type | Java Regex | Java Regex | +| Named Groups Required | Yes | Yes | +| Multiple Named Groups | Yes | No | +| Multiple Matches | Yes | No | +| Text Substitution | Yes | No | +| Offset Tracking | Yes | No | +| Special Characters in Group Names | No | No | + +## Limitations + +**Named Capture Group Naming:** +* Group names must start with a letter and contain only letters and digits +* For detailed Java regex pattern syntax and usage, refer to the [official Java Pattern documentation](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html) + +**Pattern Requirements:** +* Pattern must contain at least one named capture group +* Regular capture groups `(...)` without names are not allowed + +**Max Match Limit:** +* The `max_match` parameter is subject to a configurable system limit to prevent memory exhaustion +* When `max_match=0` (unlimited) is specified, it is automatically capped at the configured limit (default: 10) +* User-specified values exceeding the configured limit will result in an error +* Users can adjust the limit via the `plugins.ppl.rex.max_match.limit` cluster setting. Setting this limit to a large value is not recommended as it can lead to excessive memory consumption, especially with patterns that match empty strings (e.g., `\d*`, `\w*`) \ No newline at end of file diff --git a/docs/user/ppl/cmd/rex.rst b/docs/user/ppl/cmd/rex.rst deleted file mode 100644 index 3dec7f26c42..00000000000 --- a/docs/user/ppl/cmd/rex.rst +++ /dev/null @@ -1,235 +0,0 @@ -=== -rex -=== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``rex`` command extracts fields from a raw text field using regular expression named capture groups. - -Syntax -====== -rex [mode=] field= [max_match=] [offset_field=] - -* field: mandatory. The field must be a string field to extract data from. -* pattern: mandatory string. The regular expression pattern with named capture groups used to extract new fields. Pattern must contain at least one named capture group using ``(?pattern)`` syntax. -* mode: optional. Either ``extract`` or ``sed``. **Default:** extract - - * **extract mode** (default): Creates new fields from regular expression named capture groups. This is the standard field extraction behavior. - * **sed mode**: Performs text substitution on the field using sed-style patterns - - * ``s/pattern/replacement/`` - Replace first occurrence - * ``s/pattern/replacement/g`` - Replace all occurrences (global) - * ``s/pattern/replacement/n`` - Replace only the nth occurrence (where n is a number) - * ``y/from_chars/to_chars/`` - Character-by-character transliteration - * Backreferences: ``\1``, ``\2``, etc. reference captured groups in replacement - -* max_match: optional integer (default=1). Maximum number of matches to extract. If greater than 1, extracted fields become arrays. The value 0 means unlimited matches, but is automatically capped to the configured limit (default: 10, configurable via ``plugins.ppl.rex.max_match.limit``). -* offset_field: optional string. Field name to store the character offset positions of matches. Only available in extract mode. - -Example 1: Basic Field Extraction -================================= - -This example shows extracting username and domain from email addresses using named capture groups. Both extracted fields are returned as string type. - -PPL query:: - - os> source=accounts | rex field=email "(?[^@]+)@(?[^.]+)" | fields email, username, domain | head 2 ; - fetched rows / total rows = 2/2 - +-----------------------+------------+--------+ - | email | username | domain | - |-----------------------+------------+--------| - | amberduke@pyrami.com | amberduke | pyrami | - | hattiebond@netagy.com | hattiebond | netagy | - +-----------------------+------------+--------+ - - -Example 2: Handling Non-matching Patterns -========================================= - -This example shows the rex command returning all events, setting extracted fields to null for non-matching patterns. Extracted fields would be string type when matches are found. - -PPL query:: - - os> source=accounts | rex field=email "(?[^@]+)@(?gmail\\.com)" | fields email, user, domain | head 2 ; - fetched rows / total rows = 2/2 - +-----------------------+------+--------+ - | email | user | domain | - |-----------------------+------+--------| - | amberduke@pyrami.com | null | null | - | hattiebond@netagy.com | null | null | - +-----------------------+------+--------+ - - -Example 3: Multiple Matches with max_match -========================================== - -This example shows extracting multiple words from address field using max_match parameter. The extracted field is returned as an array type containing string elements. - -PPL query:: - - os> source=accounts | rex field=address "(?[A-Za-z]+)" max_match=2 | fields address, words | head 3 ; - fetched rows / total rows = 3/3 - +--------------------+------------------+ - | address | words | - |--------------------+------------------| - | 880 Holmes Lane | [Holmes,Lane] | - | 671 Bristol Street | [Bristol,Street] | - | 789 Madison Street | [Madison,Street] | - +--------------------+------------------+ - - -Example 4: Text Replacement with mode=sed -========================================= - -This example shows replacing email domains using sed mode for text substitution. The extracted field is returned as string type. - -PPL query:: - - os> source=accounts | rex field=email mode=sed "s/@.*/@company.com/" | fields email | head 2 ; - fetched rows / total rows = 2/2 - +------------------------+ - | email | - |------------------------| - | amberduke@company.com | - | hattiebond@company.com | - +------------------------+ - - -Example 5: Using offset_field -============================= - -This example shows tracking the character positions where matches occur. Extracted fields are string type, and the offset_field is also string type. - -PPL query:: - - os> source=accounts | rex field=email "(?[^@]+)@(?[^.]+)" offset_field=matchpos | fields email, username, domain, matchpos | head 2 ; - fetched rows / total rows = 2/2 - +-----------------------+------------+--------+---------------------------+ - | email | username | domain | matchpos | - |-----------------------+------------+--------+---------------------------| - | amberduke@pyrami.com | amberduke | pyrami | domain=10-15&username=0-8 | - | hattiebond@netagy.com | hattiebond | netagy | domain=11-16&username=0-9 | - +-----------------------+------------+--------+---------------------------+ - - -Example 6: Complex Email Pattern -================================ - -This example shows extracting comprehensive email components including top-level domain. All extracted fields are returned as string type. - -PPL query:: - - os> source=accounts | rex field=email "(?[a-zA-Z0-9._%+-]+)@(?[a-zA-Z0-9.-]+)\\.(?[a-zA-Z]{2,})" | fields email, user, domain, tld | head 2 ; - fetched rows / total rows = 2/2 - +-----------------------+------------+--------+-----+ - | email | user | domain | tld | - |-----------------------+------------+--------+-----| - | amberduke@pyrami.com | amberduke | pyrami | com | - | hattiebond@netagy.com | hattiebond | netagy | com | - +-----------------------+------------+--------+-----+ - - -Example 7: Chaining Multiple rex Commands -========================================= - -This example shows extracting initial letters from both first and last names. All extracted fields are returned as string type. - -PPL query:: - - os> source=accounts | rex field=firstname "(?^.)" | rex field=lastname "(?^.)" | fields firstname, lastname, firstinitial, lastinitial | head 3 ; - fetched rows / total rows = 3/3 - +-----------+----------+--------------+-------------+ - | firstname | lastname | firstinitial | lastinitial | - |-----------+----------+--------------+-------------| - | Amber | Duke | A | D | - | Hattie | Bond | H | B | - | Nanette | Bates | N | B | - +-----------+----------+--------------+-------------+ - - -Example 8: Named Capture Group Limitations -========================================== - -This example demonstrates naming restrictions for capture groups. Group names cannot contain underscores due to Java regex limitations. - -Invalid PPL query with underscores:: - - os> source=accounts | rex field=email "(?[^@]+)@(?[^.]+)" | fields email, user_name, email_domain ; - {'reason': 'Invalid Query', 'details': "Invalid capture group name 'user_name'. Java regex group names must start with a letter and contain only letters and digits.", 'type': 'IllegalArgumentException'} - Error: Query returned no data - -Correct PPL query without underscores:: - - os> source=accounts | rex field=email "(?[^@]+)@(?[^.]+)" | fields email, username, emaildomain | head 2 ; - fetched rows / total rows = 2/2 - +-----------------------+------------+-------------+ - | email | username | emaildomain | - |-----------------------+------------+-------------| - | amberduke@pyrami.com | amberduke | pyrami | - | hattiebond@netagy.com | hattiebond | netagy | - +-----------------------+------------+-------------+ - - -Example 9: Max Match Limit Protection -===================================== - -This example demonstrates the max_match limit protection mechanism. When max_match=0 (unlimited) is specified, the system automatically caps it to prevent memory exhaustion. - -PPL query with max_match=0 automatically capped to default limit of 10:: - - os> source=accounts | rex field=address "(?\\d*)" max_match=0 | eval digit_count=array_length(digit) | fields address, digit_count | head 1 ; - fetched rows / total rows = 1/1 - +-----------------+-------------+ - | address | digit_count | - |-----------------+-------------| - | 880 Holmes Lane | 10 | - +-----------------+-------------+ - -PPL query exceeding the configured limit results in an error:: - - os> source=accounts | rex field=address "(?\\d*)" max_match=100 | fields address, digit | head 1 ; - {'reason': 'Invalid Query', 'details': 'Rex command max_match value (100) exceeds the configured limit (10). Consider using a smaller max_match value or adjust the plugins.ppl.rex.max_match.limit setting.', 'type': 'IllegalArgumentException'} - Error: Query returned no data - - -Comparison with Related Commands -================================ - -================================== ============ ============ -Feature rex parse -================================== ============ ============ -Pattern Type Java Regex Java Regex -Named Groups Required Yes Yes -Multiple Named Groups Yes No -Multiple Matches Yes No -Text Substitution Yes No -Offset Tracking Yes No -Special Characters in Group Names No No -================================== ============ ============ - - -Limitations -=========== -**Named Capture Group Naming:** - -* Group names must start with a letter and contain only letters and digits -* For detailed Java regex pattern syntax and usage, refer to the `official Java Pattern documentation `_ - -**Pattern Requirements:** - -* Pattern must contain at least one named capture group -* Regular capture groups ``(...)`` without names are not allowed - -**Max Match Limit:** - -* The ``max_match`` parameter is subject to a configurable system limit to prevent memory exhaustion -* When ``max_match=0`` (unlimited) is specified, it is automatically capped at the configured limit (default: 10) -* User-specified values exceeding the configured limit will result in an error -* Users can adjust the limit via the ``plugins.ppl.rex.max_match.limit`` cluster setting. Setting this limit to a large value is not recommended as it can lead to excessive memory consumption, especially with patterns that match empty strings (e.g., ``\d*``, ``\w*``) \ No newline at end of file diff --git a/docs/user/ppl/cmd/search.md b/docs/user/ppl/cmd/search.md new file mode 100644 index 00000000000..f05f47aa196 --- /dev/null +++ b/docs/user/ppl/cmd/search.md @@ -0,0 +1,745 @@ +# search + +## Description + +The `search` command retrieves document from the index. The `search` command can only be used as the first command in the PPL query. +## Syntax + +search source=[\:]\ [search-expression] +* search: search keyword, which could be ignored. +* index: mandatory. search command must specify which index to query from. The index name can be prefixed by "\:" for cross-cluster search. +* search-expression: optional. Search expression that gets converted to OpenSearch [query_string](https://docs.opensearch.org/latest/query-dsl/full-text/query-string/) function which uses [Lucene Query Syntax](https://lucene.apache.org/core/2_9_4/queryparsersyntax.html). + +## Search Expression + +The search expression syntax supports: +* **Full text search**: `error` or `"error message"` - Searches the default field configured by the `index.query.default_field` setting (defaults to `*` which searches all fields) +* **Field-value comparisons**: `field=value`, `field!=value`, `field>value`, `field>=value`, `field[+<...>]@` - Time offset from current time + +**Relative Time Components**: +* **Time offset**: `+` (future) or `-` (past) +* **Time amount**: Numeric value + time unit (`second`, `minute`, `hour`, `day`, `week`, `month`, `year`, and their variants) +* **Snap to unit**: Optional `@` to round to nearest unit (hour, day, month, etc.) + +**Examples of Time Modifier Values**: +* `earliest=now` - From current time +* `latest='2024-12-31 23:59:59'` - Until a specific date +* `earliest=-7d` - From 7 days ago +* `latest='+1d@d'` - Until tomorrow at start of day +* `earliest='-1month@month'` - From start of previous month +* `latest=1754020061` - Until a unix timestamp (August 1, 2025 03:47:41 at UTC) + +Read more details on time modifiers in the [PPL relative_timestamp documentation](https://github.com/opensearch-project/opensearch-spark/blob/main/docs/ppl-lang/functions/ppl-datetime.md#relative_timestamp). +**Notes:** +* **Column name conflicts**: If your data contains columns named "earliest" or "latest", use backticks to access them as regular fields (e.g., `` `earliest`="value"``) to avoid conflicts with time modifier syntax. +* **Time snap syntax**: Time modifiers with chained time offsets must be wrapped in quotes (e.g., `latest='+1d@month-10h'`) for proper query parsing. + +## Default Field Configuration + +When you search without specifying a field, it searches the default field configured by the `index.query.default_field` index setting (defaults to `*` which searches all fields). +You can check or modify the default field setting + GET /accounts/_settings/index.query.default_field + PUT /accounts/_settings + { + "index.query.default_field": "firstname,lastname,email" + } +## Field Types and Search Behavior + +**Text Fields**: Full-text search, phrase search +* `search message="error occurred" source=logs` +* Limitations: Wildcards apply to terms after analysis, not entire field value. + +**Keyword Fields**: Exact matching, wildcard patterns +* `search status="ACTIVE" source=logs` +* Limitations: No text analysis, case-sensitive matching + +**Numeric Fields**: Range queries, exact matching, IN operator +* `search age>=18 AND balance<50000 source=accounts` +* Limitations: No wildcard or text search support + +**Date Fields**: Range queries, exact matching, IN operator +* `search timestamp>="2024-01-01" source=logs` +* Limitations: Must use index mapping date format, no wildcards + +**Boolean Fields**: true/false values only, exact matching, IN operator +* `search active=true source=users` +* Limitations: No wildcards or range queries + +**IP Fields**: Exact matching, CIDR notation +* `search client_ip="192.168.1.0/24" source=logs` +* Limitations: No wildcards for partial IP matching. For wildcard search use multi field with keyword: `search ip_address.keyword='1*' source=logs` or WHERE clause: `source=logs | where cast(ip_address as string) like '1%'` + +**Field Type Performance Tips**: + * Each field type has specific search capabilities and limitations. Using the wrong field type during ingestion impacts performance and accuracy + * For wildcard searches on non-keyword fields: Add a keyword field copy for better performance. Example: If you need wildcards on a text field, create `message.keyword` alongside `message` + +## Cross-Cluster Search + +Cross-cluster search lets any node in a cluster execute search requests against other clusters. Refer to [Cross-Cluster Search](../admin/cross_cluster_search.md) for configuration. +## Example 1: Text Search + +**Basic Text Search** (unquoted single term) + +```ppl +search ERROR source=otellogs +| sort @timestamp +| fields severityText, body +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+---------------------------------------------------------+ +| severityText | body | +|--------------+---------------------------------------------------------| +| ERROR | Payment failed: Insufficient funds for user@example.com | ++--------------+---------------------------------------------------------+ +``` + +**Phrase Search** (requires quotes for multi-word exact match) + +```ppl +search "Payment failed" source=otellogs +| fields body +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------------------------------------+ +| body | +|---------------------------------------------------------| +| Payment failed: Insufficient funds for user@example.com | ++---------------------------------------------------------+ +``` + +**Implicit AND with Multiple Terms** (unquoted literals are combined with AND) + +```ppl +search user email source=otellogs +| sort @timestamp +| fields body +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------------------------------------------------------------------------------------------+ +| body | +|--------------------------------------------------------------------------------------------------------------------| +| Executing SQL: SELECT * FROM users WHERE email LIKE '%@gmail.com' AND status != 'deleted' ORDER BY created_at DESC | ++--------------------------------------------------------------------------------------------------------------------+ +``` + +Note: `search user email` is equivalent to `search user AND email`. Multiple unquoted terms are automatically combined with AND. +**Enclose in double quotes for terms which contain special characters** + +```ppl +search "john.doe+newsletter@company.com" source=otellogs +| fields body +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------------------------------------------------------------------------------------------+ +| body | +|--------------------------------------------------------------------------------------------------------------------| +| Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome! Your order #12345 is confirmed' | ++--------------------------------------------------------------------------------------------------------------------+ +``` + +### Mixed Phrase and Boolean + +```ppl +search "User authentication" OR OAuth2 source=otellogs +| sort @timestamp +| fields body +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------------------------------------------------------------------------------+ +| body | +|----------------------------------------------------------------------------------------------------------| +| [2024-01-15 10:30:09] production.INFO: User authentication successful for admin@company.org using OAuth2 | ++----------------------------------------------------------------------------------------------------------+ +``` + +## Example 2: Boolean Logic and Operator Precedence + +### Boolean Operators + +```ppl +search severityText="ERROR" OR severityText="FATAL" source=otellogs +| sort @timestamp +| fields severityText +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++--------------+ +| severityText | +|--------------| +| ERROR | +| FATAL | +| ERROR | ++--------------+ +``` + +```ppl +search severityText="INFO" AND `resource.attributes.service.name`="cart-service" source=otellogs +| fields body +| head 1; +``` + +Expected output + +```text +fetched rows / total rows = 1/1 ++----------------------------------------------------------------------------------+ +| body | +|----------------------------------------------------------------------------------| +| User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart | ++----------------------------------------------------------------------------------+ +``` + +**Operator Precedence** (highest to lowest): Parentheses → NOT → OR → AND + +```ppl +search severityText="ERROR" OR severityText="WARN" AND severityNumber>15 source=otellogs +| sort @timestamp +| fields severityText, severityNumber +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------------+----------------+ +| severityText | severityNumber | +|--------------+----------------| +| ERROR | 17 | +| ERROR | 17 | ++--------------+----------------+ +``` + +The above evaluates as `(severityText="ERROR" OR severityText="WARN") AND severityNumber>15` +## Example 3: NOT vs != Semantics + +**!= operator** (field must exist and not equal the value) + +```ppl +search employer!="Quility" source=accounts +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ +| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | +|----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| +| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | +| 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | ++----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ +``` + +**NOT operator** (excludes matching conditions, includes null fields) + +```ppl +search NOT employer="Quility" source=accounts +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ +| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | +|----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| +| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | +| 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | +| 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | ++----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ +``` + +**Key difference**: `!=` excludes null values, `NOT` includes them. +Dale Adams (account 18) has `employer=null`. He appears in `NOT employer="Quility"` but not in `employer!="Quility"`. +## Example 4: Wildcards + +### Wildcard Patterns + +```ppl +search severityText=ERR* source=otellogs +| sort @timestamp +| fields severityText +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++--------------+ +| severityText | +|--------------| +| ERROR | +| ERROR | +| ERROR2 | ++--------------+ +``` + +```ppl +search body=user* source=otellogs +| sort @timestamp +| fields body +| head 2; +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------------------------------------------------------------------------+ +| body | +|----------------------------------------------------------------------------------| +| User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart | +| Payment failed: Insufficient funds for user@example.com | ++----------------------------------------------------------------------------------+ +``` + +**Wildcard Rules**: +* `*` - Matches zero or more characters +* `?` - Matches exactly one character + +### Single character wildcard (?) + +```ppl +search severityText="INFO?" source=otellogs +| sort @timestamp +| fields severityText +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++--------------+ +| severityText | +|--------------| +| INFO2 | +| INFO3 | +| INFO4 | ++--------------+ +``` + +## Example 5: Range Queries + +Use comparison operators (>, <, >=, <=) to filter numeric and date fields within specific ranges. Range queries are particularly useful for filtering by age, price, timestamps, or any numeric metrics. + +```ppl +search severityNumber>15 AND severityNumber<=20 source=otellogs +| sort @timestamp +| fields severityNumber +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+ +| severityNumber | +|----------------| +| 17 | +| 17 | +| 18 | ++----------------+ +``` + +```ppl +search `attributes.payment.amount`>=1000.0 AND `attributes.payment.amount`<=2000.0 source=otellogs +| fields body; +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------------------------------------+ +| body | +|---------------------------------------------------------| +| Payment failed: Insufficient funds for user@example.com | ++---------------------------------------------------------+ +``` + +## Example 6: Field Search with Wildcards + +When searching in text or keyword fields, wildcards enable partial matching. This is particularly useful for finding records where you only know part of the value. Note that wildcards work best with keyword fields, while text fields may produce unexpected results due to tokenization. +**Partial Search in Keyword Fields** + +```ppl +search employer=Py* source=accounts +| fields firstname, employer +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+----------+ +| firstname | employer | +|-----------+----------| +| Amber | Pyrami | ++-----------+----------+ +``` + +### Combining Wildcards with Field Comparisons + +```ppl +search firstname=A* AND age>30 source=accounts +| fields firstname, age, city +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+-----+--------+ +| firstname | age | city | +|-----------+-----+--------| +| Amber | 32 | Brogan | ++-----------+-----+--------+ +``` + +**Important Notes on Wildcard Usage**: +* **Keyword fields**: Best for wildcard searches - exact value matching with pattern support +* **Text fields**: Wildcards apply to individual tokens after analysis, not the entire field value +* **Performance**: Leading wildcards (e.g., `*@example.com`) are slower than trailing wildcards +* **Case sensitivity**: Keyword field wildcards are case-sensitive unless normalized during indexing + +## Example 7: IN Operator and Field Comparisons + +The IN operator efficiently checks if a field matches any value from a list. This is cleaner and more performant than chaining multiple OR conditions for the same field. +**IN Operator** + +```ppl +search severityText IN ("ERROR", "WARN", "FATAL") source=otellogs +| sort @timestamp +| fields severityText +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++--------------+ +| severityText | +|--------------| +| ERROR | +| WARN | +| FATAL | ++--------------+ +``` + +### Field Comparison Examples + +```ppl +search severityNumber=17 source=otellogs +| sort @timestamp +| fields body +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------------------------------------+ +| body | +|---------------------------------------------------------| +| Payment failed: Insufficient funds for user@example.com | ++---------------------------------------------------------+ +``` + +```ppl +search `attributes.user.email`="user@example.com" source=otellogs +| fields body; +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------------------------------------+ +| body | +|---------------------------------------------------------| +| Payment failed: Insufficient funds for user@example.com | ++---------------------------------------------------------+ +``` + +## Example 8: Complex Expressions + +Combine multiple conditions using boolean operators and parentheses to create sophisticated search queries. + +```ppl +search (severityText="ERROR" OR severityText="WARN") AND severityNumber>10 source=otellogs +| sort @timestamp +| fields severityText +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++--------------+ +| severityText | +|--------------| +| ERROR | +| WARN | +| ERROR | ++--------------+ +``` + +```ppl +search `attributes.user.email`="user@example.com" OR (`attributes.error.code`="INSUFFICIENT_FUNDS" AND severityNumber>15) source=otellogs +| fields body; +``` + +Expected output: + +``` +fetched rows / total rows = 1/1 ++---------------------------------------------------------+ +| body | +|---------------------------------------------------------| +| Payment failed: Insufficient funds for user@example.com | ++---------------------------------------------------------+ +``` + +## Example 9: Time Modifiers + +Time modifiers filter search results by time range using the implicit `@timestamp` field. They support various time formats for precise temporal filtering. +**Absolute Time Filtering** + +```ppl +search earliest='2024-01-15 10:30:05' latest='2024-01-15 10:30:10' source=otellogs +| fields @timestamp, severityText +``` + +Expected output: + +```text +fetched rows / total rows = 6/6 ++-------------------------------+--------------+ +| @timestamp | severityText | +|-------------------------------+--------------| +| 2024-01-15 10:30:05.678901234 | FATAL | +| 2024-01-15 10:30:06.789012345 | TRACE | +| 2024-01-15 10:30:07.890123456 | ERROR | +| 2024-01-15 10:30:08.901234567 | WARN | +| 2024-01-15 10:30:09.012345678 | INFO | +| 2024-01-15 10:30:10.123456789 | TRACE2 | ++-------------------------------+--------------+ +``` + +**Relative Time Filtering** (before 30 seconds ago) + +```ppl +search latest=-30s source=otellogs +| sort @timestamp +| fields @timestamp, severityText +| head 3 +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-------------------------------+--------------+ +| @timestamp | severityText | +|-------------------------------+--------------| +| 2024-01-15 10:30:00.123456789 | INFO | +| 2024-01-15 10:30:01.23456789 | ERROR | +| 2024-01-15 10:30:02.345678901 | WARN | ++-------------------------------+--------------+ +``` + +**Time Snapping** (before start of current minute) + +```ppl +search latest='@m' source=otellogs +| fields @timestamp, severityText +| head 2 +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-------------------------------+--------------+ +| @timestamp | severityText | +|-------------------------------+--------------| +| 2024-01-15 10:30:00.123456789 | INFO | +| 2024-01-15 10:30:01.23456789 | ERROR | ++-------------------------------+--------------+ +``` + +### Unix Timestamp Filtering + +```ppl +search earliest=1705314600 latest=1705314605 source=otellogs +| fields @timestamp, severityText +``` + +Expected output: + +```text +fetched rows / total rows = 5/5 ++-------------------------------+--------------+ +| @timestamp | severityText | +|-------------------------------+--------------| +| 2024-01-15 10:30:00.123456789 | INFO | +| 2024-01-15 10:30:01.23456789 | ERROR | +| 2024-01-15 10:30:02.345678901 | WARN | +| 2024-01-15 10:30:03.456789012 | DEBUG | +| 2024-01-15 10:30:04.567890123 | INFO | ++-------------------------------+--------------+ +``` + +## Example 10: Special Characters and Escaping + +Understand when and how to escape special characters in your search queries. There are two categories of characters that need escaping: +**Characters that must be escaped**: +* **Backslashes (\)**: Always escape as `\\` to search for literal backslash +* **Quotes (")**: Escape as `\"` when inside quoted strings + +**Wildcard characters (escape only to search literally)**: +* **Asterisk (*)**: Use as-is for wildcard, escape as `\\*` to search for literal asterisk +* **Question mark (?)**: Use as-is for wildcard, escape as `\\?` to search for literal question mark + + +| Intent | PPL Syntax | Result | +|--------|------------|--------| +| Wildcard search | `field=user*` | Matches "user", "user123", "userABC" | +| Literal "user*" | `field="user\\*"` | Matches only "user*" | +| Wildcard search | `field=log?` | Matches "log1", "logA", "logs" | +| Literal "log?" | `field="log\\?"` | Matches only "log?" | + + +**Backslash in file paths** + +```ppl +search `attributes.error.type`="C:\\\\Users\\\\admin" source=otellogs +| fields `attributes.error.type` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------+ +| attributes.error.type | +|-----------------------| +| C:\Users\admin | ++-----------------------+ +``` + +Note: Each backslash in the search value needs to be escaped with another backslash. When using REST API with JSON, additional JSON escaping is required. +**Quotes within strings** + +```ppl +search body="\"exact phrase\"" source=otellogs +| sort @timestamp +| fields body +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| body | +|--------------------------------------------------------------------------------------------------------------------------------------------------------| +| Query contains Lucene special characters: +field:value -excluded AND (grouped OR terms) NOT "exact phrase" wildcard* fuzzy~2 /regex/ [range TO search] | ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +**Text with special characters** + +```ppl +search "wildcard\\* fuzzy~2" source=otellogs +| fields body +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ +| body | +|--------------------------------------------------------------------------------------------------------------------------------------------------------| +| Query contains Lucene special characters: +field:value -excluded AND (grouped OR terms) NOT "exact phrase" wildcard* fuzzy~2 /regex/ [range TO search] | ++--------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +## Example 11: Fetch All Data + +Retrieve all documents from an index by specifying only the source without any search conditions. This is useful for exploring small datasets or verifying data ingestion. + +```ppl +source=accounts +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ +| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | +|----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| +| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | +| 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | +| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | +| 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | ++----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/search.rst b/docs/user/ppl/cmd/search.rst deleted file mode 100644 index 31aa28cc46d..00000000000 --- a/docs/user/ppl/cmd/search.rst +++ /dev/null @@ -1,556 +0,0 @@ -====== -search -====== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``search`` command retrieves document from the index. The ``search`` command can only be used as the first command in the PPL query. - - -Syntax -====== -search source=[:] [search-expression] - -* search: search keyword, which could be ignored. -* index: mandatory. search command must specify which index to query from. The index name can be prefixed by ":" for cross-cluster search. -* search-expression: optional. Search expression that gets converted to OpenSearch `query_string `_ function which uses `Lucene Query Syntax `_. - -Search Expression -================= - -The search expression syntax supports: - -* **Full text search**: ``error`` or ``"error message"`` - Searches the default field configured by the ``index.query.default_field`` setting (defaults to ``*`` which searches all fields) -* **Field-value comparisons**: ``field=value``, ``field!=value``, ``field>value``, ``field>=value``, ``field`_: - -1. **Current time**: ``now`` or ``now()`` - the current time -2. **Absolute format**: ``MM/dd/yyyy:HH:mm:ss`` or ``yyyy-MM-dd HH:mm:ss`` -3. **Unix timestamp**: Numeric values (seconds since epoch) like ``1754020060.123`` -4. **Relative format**: ``(+|-)[+<...>]@`` - Time offset from current time - -**Relative Time Components**: - -* **Time offset**: ``+`` (future) or ``-`` (past) -* **Time amount**: Numeric value + time unit (``second``, ``minute``, ``hour``, ``day``, ``week``, ``month``, ``year``, and their variants) -* **Snap to unit**: Optional ``@`` to round to nearest unit (hour, day, month, etc.) - -**Examples of Time Modifier Values**: - -* ``earliest=now`` - From current time -* ``latest='2024-12-31 23:59:59'`` - Until a specific date -* ``earliest=-7d`` - From 7 days ago -* ``latest='+1d@d'`` - Until tomorrow at start of day -* ``earliest='-1month@month'`` - From start of previous month -* ``latest=1754020061`` - Until a unix timestamp (August 1, 2025 03:47:41 at UTC) - -Read more details on time modifiers `here `_. - -**Notes:** - -* **Column name conflicts**: If your data contains columns named "earliest" or "latest", use backticks to access them as regular fields (e.g., ```earliest`="value"``) to avoid conflicts with time modifier syntax. -* **Time snap syntax**: Time modifiers with chained time offsets must be wrapped in quotes (e.g., ``latest='+1d@month-10h'``) for proper query parsing. - -Default Field Configuration -=========================== -When you search without specifying a field, it searches the default field configured by the ``index.query.default_field`` index setting (defaults to ``*`` which searches all fields). - -You can check or modify the default field setting:: - - GET /accounts/_settings/index.query.default_field - - PUT /accounts/_settings - { - "index.query.default_field": "firstname,lastname,email" - } - -Field Types and Search Behavior -=============================== - -**Text Fields**: Full-text search, phrase search - -* ``search message="error occurred" source=logs`` - -* Limitations: Wildcards apply to terms after analysis, not entire field value. - -**Keyword Fields**: Exact matching, wildcard patterns - -* ``search status="ACTIVE" source=logs`` - -* Limitations: No text analysis, case-sensitive matching - -**Numeric Fields**: Range queries, exact matching, IN operator - -* ``search age>=18 AND balance<50000 source=accounts`` - -* Limitations: No wildcard or text search support - -**Date Fields**: Range queries, exact matching, IN operator - -* ``search timestamp>="2024-01-01" source=logs`` - -* Limitations: Must use index mapping date format, no wildcards - -**Boolean Fields**: true/false values only, exact matching, IN operator - -* ``search active=true source=users`` - -* Limitations: No wildcards or range queries - -**IP Fields**: Exact matching, CIDR notation - -* ``search client_ip="192.168.1.0/24" source=logs`` - -* Limitations: No wildcards for partial IP matching. For wildcard search use multi field with keyword: ``search ip_address.keyword='1*' source=logs`` or WHERE clause: ``source=logs | where cast(ip_address as string) like '1%'`` - -**Field Type Performance Tips**: - - * Each field type has specific search capabilities and limitations. Using the wrong field type during ingestion impacts performance and accuracy - * For wildcard searches on non-keyword fields: Add a keyword field copy for better performance. Example: If you need wildcards on a text field, create ``message.keyword`` alongside ``message`` - -Cross-Cluster Search -==================== -Cross-cluster search lets any node in a cluster execute search requests against other clusters. Refer to `Cross-Cluster Search `_ for configuration. - -Example 1: Text Search -====================== - -**Basic Text Search** (unquoted single term):: - - os> search ERROR source=otellogs | sort @timestamp | fields severityText, body | head 1; - fetched rows / total rows = 1/1 - +--------------+---------------------------------------------------------+ - | severityText | body | - |--------------+---------------------------------------------------------| - | ERROR | Payment failed: Insufficient funds for user@example.com | - +--------------+---------------------------------------------------------+ - -**Phrase Search** (requires quotes for multi-word exact match):: - - os> search "Payment failed" source=otellogs | fields body; - fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | body | - |---------------------------------------------------------| - | Payment failed: Insufficient funds for user@example.com | - +---------------------------------------------------------+ - -**Implicit AND with Multiple Terms** (unquoted literals are combined with AND):: - - os> search user email source=otellogs | sort @timestamp | fields body | head 1; - fetched rows / total rows = 1/1 - +--------------------------------------------------------------------------------------------------------------------+ - | body | - |--------------------------------------------------------------------------------------------------------------------| - | Executing SQL: SELECT * FROM users WHERE email LIKE '%@gmail.com' AND status != 'deleted' ORDER BY created_at DESC | - +--------------------------------------------------------------------------------------------------------------------+ - -Note: ``search user email`` is equivalent to ``search user AND email``. Multiple unquoted terms are automatically combined with AND. - -**Enclose in double quotes for terms which contain special characters**:: - - os> search "john.doe+newsletter@company.com" source=otellogs | fields body; - fetched rows / total rows = 1/1 - +--------------------------------------------------------------------------------------------------------------------+ - | body | - |--------------------------------------------------------------------------------------------------------------------| - | Email notification sent to john.doe+newsletter@company.com with subject: 'Welcome! Your order #12345 is confirmed' | - +--------------------------------------------------------------------------------------------------------------------+ - -**Mixed Phrase and Boolean**:: - - os> search "User authentication" OR OAuth2 source=otellogs | sort @timestamp | fields body | head 1; - fetched rows / total rows = 1/1 - +----------------------------------------------------------------------------------------------------------+ - | body | - |----------------------------------------------------------------------------------------------------------| - | [2024-01-15 10:30:09] production.INFO: User authentication successful for admin@company.org using OAuth2 | - +----------------------------------------------------------------------------------------------------------+ - -Example 2: Boolean Logic and Operator Precedence -================================================= - -**Boolean Operators**:: - - os> search severityText="ERROR" OR severityText="FATAL" source=otellogs | sort @timestamp | fields severityText | head 3; - fetched rows / total rows = 3/3 - +--------------+ - | severityText | - |--------------| - | ERROR | - | FATAL | - | ERROR | - +--------------+ - - os> search severityText="INFO" AND `resource.attributes.service.name`="cart-service" source=otellogs | fields body | head 1; - fetched rows / total rows = 1/1 - +----------------------------------------------------------------------------------+ - | body | - |----------------------------------------------------------------------------------| - | User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart | - +----------------------------------------------------------------------------------+ - -**Operator Precedence** (highest to lowest): Parentheses → NOT → OR → AND:: - - os> search severityText="ERROR" OR severityText="WARN" AND severityNumber>15 source=otellogs | sort @timestamp | fields severityText, severityNumber | head 2; - fetched rows / total rows = 2/2 - +--------------+----------------+ - | severityText | severityNumber | - |--------------+----------------| - | ERROR | 17 | - | ERROR | 17 | - +--------------+----------------+ - -The above evaluates as ``(severityText="ERROR" OR severityText="WARN") AND severityNumber>15`` - -Example 3: NOT vs != Semantics -============================== - -**!= operator** (field must exist and not equal the value):: - - os> search employer!="Quility" source=accounts; - fetched rows / total rows = 2/2 - +----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - +----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ - -**NOT operator** (excludes matching conditions, includes null fields):: - - os> search NOT employer="Quility" source=accounts; - fetched rows / total rows = 3/3 - +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | - +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ - -**Key difference**: ``!=`` excludes null values, ``NOT`` includes them. - -Dale Adams (account 18) has ``employer=null``. He appears in ``NOT employer="Quility"`` but not in ``employer!="Quility"``. - -Example 4: Wildcards -==================== - -**Wildcard Patterns**:: - - os> search severityText=ERR* source=otellogs | sort @timestamp | fields severityText | head 3; - fetched rows / total rows = 3/3 - +--------------+ - | severityText | - |--------------| - | ERROR | - | ERROR | - | ERROR2 | - +--------------+ - - os> search body=user* source=otellogs | sort @timestamp | fields body | head 2; - fetched rows / total rows = 2/2 - +----------------------------------------------------------------------------------+ - | body | - |----------------------------------------------------------------------------------| - | User e1ce63e6-8501-11f0-930d-c2fcbdc05f14 adding 4 of product HQTGWGPNH4 to cart | - | Payment failed: Insufficient funds for user@example.com | - +----------------------------------------------------------------------------------+ - -**Wildcard Rules**: - -* ``*`` - Matches zero or more characters -* ``?`` - Matches exactly one character - -**Single character wildcard (?)**:: - - os> search severityText="INFO?" source=otellogs | sort @timestamp | fields severityText | head 3; - fetched rows / total rows = 3/3 - +--------------+ - | severityText | - |--------------| - | INFO2 | - | INFO3 | - | INFO4 | - +--------------+ - - -Example 5: Range Queries -======================== - -Use comparison operators (>, <, >=, <=) to filter numeric and date fields within specific ranges. Range queries are particularly useful for filtering by age, price, timestamps, or any numeric metrics. - -:: - - os> search severityNumber>15 AND severityNumber<=20 source=otellogs | sort @timestamp | fields severityNumber | head 3; - fetched rows / total rows = 3/3 - +----------------+ - | severityNumber | - |----------------| - | 17 | - | 17 | - | 18 | - +----------------+ - - os> search `attributes.payment.amount`>=1000.0 AND `attributes.payment.amount`<=2000.0 source=otellogs | fields body; - fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | body | - |---------------------------------------------------------| - | Payment failed: Insufficient funds for user@example.com | - +---------------------------------------------------------+ - -Example 6: Field Search with Wildcards -====================================== - -When searching in text or keyword fields, wildcards enable partial matching. This is particularly useful for finding records where you only know part of the value. Note that wildcards work best with keyword fields, while text fields may produce unexpected results due to tokenization. - -**Partial Search in Keyword Fields**:: - - os> search employer=Py* source=accounts | fields firstname, employer; - fetched rows / total rows = 1/1 - +-----------+----------+ - | firstname | employer | - |-----------+----------| - | Amber | Pyrami | - +-----------+----------+ - -**Combining Wildcards with Field Comparisons**:: - - os> search firstname=A* AND age>30 source=accounts | fields firstname, age, city; - fetched rows / total rows = 1/1 - +-----------+-----+--------+ - | firstname | age | city | - |-----------+-----+--------| - | Amber | 32 | Brogan | - +-----------+-----+--------+ - -**Important Notes on Wildcard Usage**: - -* **Keyword fields**: Best for wildcard searches - exact value matching with pattern support -* **Text fields**: Wildcards apply to individual tokens after analysis, not the entire field value -* **Performance**: Leading wildcards (e.g., ``*@example.com``) are slower than trailing wildcards -* **Case sensitivity**: Keyword field wildcards are case-sensitive unless normalized during indexing - -Example 7: IN Operator and Field Comparisons -============================================ - -The IN operator efficiently checks if a field matches any value from a list. This is cleaner and more performant than chaining multiple OR conditions for the same field. - -**IN Operator**:: - - os> search severityText IN ("ERROR", "WARN", "FATAL") source=otellogs | sort @timestamp | fields severityText | head 3; - fetched rows / total rows = 3/3 - +--------------+ - | severityText | - |--------------| - | ERROR | - | WARN | - | FATAL | - +--------------+ - -**Field Comparison Examples**:: - - os> search severityNumber=17 source=otellogs | sort @timestamp | fields body | head 1; - fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | body | - |---------------------------------------------------------| - | Payment failed: Insufficient funds for user@example.com | - +---------------------------------------------------------+ - - os> search `attributes.user.email`="user@example.com" source=otellogs | fields body; - fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | body | - |---------------------------------------------------------| - | Payment failed: Insufficient funds for user@example.com | - +---------------------------------------------------------+ - -Example 8: Complex Expressions -============================== - -Combine multiple conditions using boolean operators and parentheses to create sophisticated search queries. - -:: - - os> search (severityText="ERROR" OR severityText="WARN") AND severityNumber>10 source=otellogs | sort @timestamp | fields severityText | head 3; - fetched rows / total rows = 3/3 - +--------------+ - | severityText | - |--------------| - | ERROR | - | WARN | - | ERROR | - +--------------+ - - os> search `attributes.user.email`="user@example.com" OR (`attributes.error.code`="INSUFFICIENT_FUNDS" AND severityNumber>15) source=otellogs | fields body; - fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | body | - |---------------------------------------------------------| - | Payment failed: Insufficient funds for user@example.com | - +---------------------------------------------------------+ - -Example 9: Time Modifiers -========================= - -Time modifiers filter search results by time range using the implicit ``@timestamp`` field. They support various time formats for precise temporal filtering. - -**Absolute Time Filtering**:: - - os> search earliest='2024-01-15 10:30:05' latest='2024-01-15 10:30:10' source=otellogs | fields @timestamp, severityText; - fetched rows / total rows = 6/6 - +-------------------------------+--------------+ - | @timestamp | severityText | - |-------------------------------+--------------| - | 2024-01-15 10:30:05.678901234 | FATAL | - | 2024-01-15 10:30:06.789012345 | TRACE | - | 2024-01-15 10:30:07.890123456 | ERROR | - | 2024-01-15 10:30:08.901234567 | WARN | - | 2024-01-15 10:30:09.012345678 | INFO | - | 2024-01-15 10:30:10.123456789 | TRACE2 | - +-------------------------------+--------------+ - -**Relative Time Filtering** (before 30 seconds ago):: - - os> search latest=-30s source=otellogs | sort @timestamp | fields @timestamp, severityText | head 3; - fetched rows / total rows = 3/3 - +-------------------------------+--------------+ - | @timestamp | severityText | - |-------------------------------+--------------| - | 2024-01-15 10:30:00.123456789 | INFO | - | 2024-01-15 10:30:01.23456789 | ERROR | - | 2024-01-15 10:30:02.345678901 | WARN | - +-------------------------------+--------------+ - -**Time Snapping** (before start of current minute):: - - os> search latest='@m' source=otellogs | fields @timestamp, severityText | head 2; - fetched rows / total rows = 2/2 - +-------------------------------+--------------+ - | @timestamp | severityText | - |-------------------------------+--------------| - | 2024-01-15 10:30:00.123456789 | INFO | - | 2024-01-15 10:30:01.23456789 | ERROR | - +-------------------------------+--------------+ - -**Unix Timestamp Filtering**:: - - os> search earliest=1705314600 latest=1705314605 source=otellogs | fields @timestamp, severityText; - fetched rows / total rows = 5/5 - +-------------------------------+--------------+ - | @timestamp | severityText | - |-------------------------------+--------------| - | 2024-01-15 10:30:00.123456789 | INFO | - | 2024-01-15 10:30:01.23456789 | ERROR | - | 2024-01-15 10:30:02.345678901 | WARN | - | 2024-01-15 10:30:03.456789012 | DEBUG | - | 2024-01-15 10:30:04.567890123 | INFO | - +-------------------------------+--------------+ - -Example 10: Special Characters and Escaping -=========================================== - -Understand when and how to escape special characters in your search queries. There are two categories of characters that need escaping: - -**Characters that must be escaped**: -* **Backslashes (\)**: Always escape as ``\\`` to search for literal backslash -* **Quotes (")**: Escape as ``\"`` when inside quoted strings - -**Wildcard characters (escape only to search literally)**: -* **Asterisk (*)**: Use as-is for wildcard, escape as ``\\*`` to search for literal asterisk -* **Question mark (?)**: Use as-is for wildcard, escape as ``\\?`` to search for literal question mark - -.. list-table:: Wildcard vs Literal Search - :widths: 25 35 40 - :header-rows: 1 - - * - Intent - - PPL Syntax - - Result - * - Wildcard search - - ``field=user*`` - - Matches "user", "user123", "userABC" - * - Literal "user*" - - ``field="user\\*"`` - - Matches only "user*" - * - Wildcard search - - ``field=log?`` - - Matches "log1", "logA", "logs" - * - Literal "log?" - - ``field="log\\?"`` - - Matches only "log?" - -**Backslash in file paths**:: - - os> search `attributes.error.type`="C:\\\\Users\\\\admin" source=otellogs | fields `attributes.error.type`; - fetched rows / total rows = 1/1 - +-----------------------+ - | attributes.error.type | - |-----------------------| - | C:\Users\admin | - +-----------------------+ - -Note: Each backslash in the search value needs to be escaped with another backslash. When using REST API with JSON, additional JSON escaping is required. - -**Quotes within strings**:: - - os> search body="\"exact phrase\"" source=otellogs | sort @timestamp | fields body | head 1; - fetched rows / total rows = 1/1 - +--------------------------------------------------------------------------------------------------------------------------------------------------------+ - | body | - |--------------------------------------------------------------------------------------------------------------------------------------------------------| - | Query contains Lucene special characters: +field:value -excluded AND (grouped OR terms) NOT "exact phrase" wildcard* fuzzy~2 /regex/ [range TO search] | - +--------------------------------------------------------------------------------------------------------------------------------------------------------+ - -**Text with special characters**:: - - os> search "wildcard\\* fuzzy~2" source=otellogs | fields body | head 1; - fetched rows / total rows = 1/1 - +--------------------------------------------------------------------------------------------------------------------------------------------------------+ - | body | - |--------------------------------------------------------------------------------------------------------------------------------------------------------| - | Query contains Lucene special characters: +field:value -excluded AND (grouped OR terms) NOT "exact phrase" wildcard* fuzzy~2 /regex/ [range TO search] | - +--------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Example 11: Fetch All Data -========================== - -Retrieve all documents from an index by specifying only the source without any search conditions. This is useful for exploring small datasets or verifying data ingestion. - -:: - - os> source=accounts; - fetched rows / total rows = 4/4 - +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | - | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | - +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ \ No newline at end of file diff --git a/docs/user/ppl/cmd/showdatasources.md b/docs/user/ppl/cmd/showdatasources.md new file mode 100644 index 00000000000..10129873aa6 --- /dev/null +++ b/docs/user/ppl/cmd/showdatasources.md @@ -0,0 +1,32 @@ +# show datasources + +## Description + +Use the `show datasources` command to query datasources configured in the PPL engine. The `show datasources` command can only be used as the first command in the PPL query. +## Syntax + +show datasources +## Example 1: Fetch all PROMETHEUS datasources + +This example shows fetching all the datasources of type prometheus. +PPL query for all PROMETHEUS DATASOURCES + +```ppl +show datasources +| where CONNECTOR_TYPE='PROMETHEUS' +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------+----------------+ +| DATASOURCE_NAME | CONNECTOR_TYPE | +|-----------------+----------------| +| my_prometheus | PROMETHEUS | ++-----------------+----------------+ +``` + +## Limitations + +The `show datasources` command can only work with `plugins.calcite.enabled=false`. \ No newline at end of file diff --git a/docs/user/ppl/cmd/showdatasources.rst b/docs/user/ppl/cmd/showdatasources.rst deleted file mode 100644 index 9d0794bb3aa..00000000000 --- a/docs/user/ppl/cmd/showdatasources.rst +++ /dev/null @@ -1,38 +0,0 @@ -================ -show datasources -================ - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| Use the ``show datasources`` command to query datasources configured in the PPL engine. The ``show datasources`` command can only be used as the first command in the PPL query. - -Syntax -====== -show datasources - -Example 1: Fetch all PROMETHEUS datasources -=========================================== - -This example shows fetching all the datasources of type prometheus. - -PPL query for all PROMETHEUS DATASOURCES:: - - os> show datasources | where CONNECTOR_TYPE='PROMETHEUS'; - fetched rows / total rows = 1/1 - +-----------------+----------------+ - | DATASOURCE_NAME | CONNECTOR_TYPE | - |-----------------+----------------| - | my_prometheus | PROMETHEUS | - +-----------------+----------------+ - - -Limitations -=========== -The ``show datasources`` command can only work with ``plugins.calcite.enabled=false``. diff --git a/docs/user/ppl/cmd/sort.md b/docs/user/ppl/cmd/sort.md new file mode 100644 index 00000000000..a6e5ba1c0ea --- /dev/null +++ b/docs/user/ppl/cmd/sort.md @@ -0,0 +1,256 @@ +# sort + +## Description + +The `sort` command sorts all the search results by the specified fields. +## Syntax + +sort [count] <[+\|-] sort-field \| sort-field [asc\|a\|desc\|d]>... +* count: optional. The number of results to return. Specifying a count of 0 or less than 0 returns all results. **Default:** 0. +* [+\|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. +* [asc\|a\|desc\|d]: optional. asc/a stands for ascending order and NULL/MISSING first. desc/d stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. +* sort-field: mandatory. The field used to sort. Can use `auto(field)`, `str(field)`, `ip(field)`, or `num(field)` to specify how to interpret field values. + +> **Note:** +> You cannot mix +/- and asc/desc in the same sort command. Choose one approach for all fields in a single sort command. +> +> + +## Example 1: Sort by one field + +This example shows sorting all documents by age field in ascending order. + +```ppl +source=accounts +| sort age +| fields account_number, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 13 | 28 | +| 1 | 32 | +| 18 | 33 | +| 6 | 36 | ++----------------+-----+ +``` + +## Example 2: Sort by one field return all the result + +This example shows sorting all documents by age field in ascending order and returning all results. + +```ppl +source=accounts +| sort 0 age +| fields account_number, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 13 | 28 | +| 1 | 32 | +| 18 | 33 | +| 6 | 36 | ++----------------+-----+ +``` + +## Example 3: Sort by one field in descending order (using -) + +This example shows sorting all documents by age field in descending order. + +```ppl +source=accounts +| sort - age +| fields account_number, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 6 | 36 | +| 18 | 33 | +| 1 | 32 | +| 13 | 28 | ++----------------+-----+ +``` + +## Example 4: Sort by one field in descending order (using desc) + +This example shows sorting all the document by the age field in descending order using the desc keyword. + +```ppl +source=accounts +| sort age desc +| fields account_number, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 6 | 36 | +| 18 | 33 | +| 1 | 32 | +| 13 | 28 | ++----------------+-----+ +``` + +## Example 5: Sort by multiple fields (using +/-) + +This example shows sorting all documents by gender field in ascending order and age field in descending order using +/- operators. + +```ppl +source=accounts +| sort + gender, - age +| fields account_number, gender, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+--------+-----+ +| account_number | gender | age | +|----------------+--------+-----| +| 13 | F | 28 | +| 6 | M | 36 | +| 18 | M | 33 | +| 1 | M | 32 | ++----------------+--------+-----+ +``` + +## Example 6: Sort by multiple fields (using asc/desc) + +This example shows sorting all the document by the gender field in ascending order and age field in descending order using asc/desc keywords. + +```ppl +source=accounts +| sort gender asc, age desc +| fields account_number, gender, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+--------+-----+ +| account_number | gender | age | +|----------------+--------+-----| +| 13 | F | 28 | +| 6 | M | 36 | +| 18 | M | 33 | +| 1 | M | 32 | ++----------------+--------+-----+ +``` + +## Example 7: Sort by field include null value + +This example shows sorting employer field by default option (ascending order and null first). The result shows that null value is in the first row. + +```ppl +source=accounts +| sort employer +| fields employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------+ +| employer | +|----------| +| null | +| Netagy | +| Pyrami | +| Quility | ++----------+ +``` + +## Example 8: Specify the number of sorted documents to return + +This example shows sorting all documents and returning 2 documents. + +```ppl +source=accounts +| sort 2 age +| fields account_number, age +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 13 | 28 | +| 1 | 32 | ++----------------+-----+ +``` + +## Example 9: Sort with desc modifier + +This example shows sorting with the desc modifier to reverse sort order. + +```ppl +source=accounts +| sort age desc +| fields account_number, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----+ +| account_number | age | +|----------------+-----| +| 6 | 36 | +| 18 | 33 | +| 1 | 32 | +| 13 | 28 | ++----------------+-----+ +``` + +## Example 10: Sort with specifying field type + +This example shows sorting with str() to sort numeric values lexicographically. + +```ppl +source=accounts +| sort str(account_number) +| fields account_number +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+ +| account_number | +|----------------| +| 1 | +| 13 | +| 18 | +| 6 | ++----------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/sort.rst b/docs/user/ppl/cmd/sort.rst deleted file mode 100644 index 929a2b313b4..00000000000 --- a/docs/user/ppl/cmd/sort.rst +++ /dev/null @@ -1,208 +0,0 @@ -==== -sort -==== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``sort`` command sorts all the search results by the specified fields. - -Syntax -============ -sort [count] <[+|-] sort-field | sort-field [asc|a|desc|d]>... - - -* count: optional. The number of results to return. Specifying a count of 0 or less than 0 returns all results. **Default:** 0. -* [+|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. -* [asc|a|desc|d]: optional. asc/a stands for ascending order and NULL/MISSING first. desc/d stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. -* sort-field: mandatory. The field used to sort. Can use ``auto(field)``, ``str(field)``, ``ip(field)``, or ``num(field)`` to specify how to interpret field values. - -.. note:: - You cannot mix +/- and asc/desc in the same sort command. Choose one approach for all fields in a single sort command. - - -Example 1: Sort by one field -============================ - -This example shows sorting all documents by age field in ascending order. - -PPL query:: - - os> source=accounts | sort age | fields account_number, age; - fetched rows / total rows = 4/4 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 13 | 28 | - | 1 | 32 | - | 18 | 33 | - | 6 | 36 | - +----------------+-----+ - - -Example 2: Sort by one field return all the result -================================================== - -This example shows sorting all documents by age field in ascending order and returning all results. - -PPL query:: - - os> source=accounts | sort 0 age | fields account_number, age; - fetched rows / total rows = 4/4 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 13 | 28 | - | 1 | 32 | - | 18 | 33 | - | 6 | 36 | - +----------------+-----+ - - -Example 3: Sort by one field in descending order (using -) -========================================================== - -This example shows sorting all documents by age field in descending order. - -PPL query:: - - os> source=accounts | sort - age | fields account_number, age; - fetched rows / total rows = 4/4 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 6 | 36 | - | 18 | 33 | - | 1 | 32 | - | 13 | 28 | - +----------------+-----+ - -Example 4: Sort by one field in descending order (using desc) -============================================================== - -This example shows sorting all the document by the age field in descending order using the desc keyword. - -PPL query:: - - os> source=accounts | sort age desc | fields account_number, age; - fetched rows / total rows = 4/4 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 6 | 36 | - | 18 | 33 | - | 1 | 32 | - | 13 | 28 | - +----------------+-----+ - -Example 5: Sort by multiple fields (using +/-) -============================================== - -This example shows sorting all documents by gender field in ascending order and age field in descending order using +/- operators. - -PPL query:: - - os> source=accounts | sort + gender, - age | fields account_number, gender, age; - fetched rows / total rows = 4/4 - +----------------+--------+-----+ - | account_number | gender | age | - |----------------+--------+-----| - | 13 | F | 28 | - | 6 | M | 36 | - | 18 | M | 33 | - | 1 | M | 32 | - +----------------+--------+-----+ - -Example 6: Sort by multiple fields (using asc/desc) -==================================================== - -This example shows sorting all the document by the gender field in ascending order and age field in descending order using asc/desc keywords. - -PPL query:: - - os> source=accounts | sort gender asc, age desc | fields account_number, gender, age; - fetched rows / total rows = 4/4 - +----------------+--------+-----+ - | account_number | gender | age | - |----------------+--------+-----| - | 13 | F | 28 | - | 6 | M | 36 | - | 18 | M | 33 | - | 1 | M | 32 | - +----------------+--------+-----+ - -Example 7: Sort by field include null value -=========================================== - -This example shows sorting employer field by default option (ascending order and null first). The result shows that null value is in the first row. - -PPL query:: - - os> source=accounts | sort employer | fields employer; - fetched rows / total rows = 4/4 - +----------+ - | employer | - |----------| - | null | - | Netagy | - | Pyrami | - | Quility | - +----------+ - -Example 8: Specify the number of sorted documents to return -============================================================ - -This example shows sorting all documents and returning 2 documents. - -PPL query:: - - os> source=accounts | sort 2 age | fields account_number, age; - fetched rows / total rows = 2/2 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 13 | 28 | - | 1 | 32 | - +----------------+-----+ - -Example 9: Sort with desc modifier -=================================== - -This example shows sorting with the desc modifier to reverse sort order. - -PPL query:: - - os> source=accounts | sort age desc | fields account_number, age; - fetched rows / total rows = 4/4 - +----------------+-----+ - | account_number | age | - |----------------+-----| - | 6 | 36 | - | 18 | 33 | - | 1 | 32 | - | 13 | 28 | - +----------------+-----+ - -Example 10: Sort with specifying field type -================================== - -This example shows sorting with str() to sort numeric values lexicographically. - -PPL query:: - - os> source=accounts | sort str(account_number) | fields account_number; - fetched rows / total rows = 4/4 - +----------------+ - | account_number | - |----------------| - | 1 | - | 13 | - | 18 | - | 6 | - +----------------+ \ No newline at end of file diff --git a/docs/user/ppl/cmd/spath.md b/docs/user/ppl/cmd/spath.md new file mode 100644 index 00000000000..c83afc3a31c --- /dev/null +++ b/docs/user/ppl/cmd/spath.md @@ -0,0 +1,110 @@ +# spath + +## Description + +The `spath` command allows extracting fields from structured text data. It currently allows selecting from JSON data with JSON paths. +## Syntax + +spath input=\ [output=\] [path=]\ +* input: mandatory. The field to scan for JSON data. +* output: optional. The destination field that the data will be loaded to. **Default:** value of `path`. +* path: mandatory. The path of the data to load for the object. For more information on path syntax, see [json_extract](../functions/json.md#json_extract). + +## Note + +The `spath` command currently does not support pushdown behavior for extraction. It will be slow on large datasets. It's generally better to index fields needed for filtering directly instead of using `spath` to filter nested fields. +## Example 1: Simple Field Extraction + +The simplest spath is to extract a single field. This example extracts `n` from the `doc` field of type `text`. + +```ppl +source=structured +| spath input=doc_n n +| fields doc_n n +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------+---+ +| doc_n | n | +|----------+---| +| {"n": 1} | 1 | +| {"n": 2} | 2 | +| {"n": 3} | 3 | ++----------+---+ +``` + +## Example 2: Lists & Nesting + +This example demonstrates more JSON path uses, like traversing nested fields and extracting list elements. + +```ppl +source=structured +| spath input=doc_list output=first_element list{0} +| spath input=doc_list output=all_elements list{} +| spath input=doc_list output=nested nest_out.nest_in +| fields doc_list first_element all_elements nested +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++------------------------------------------------------+---------------+--------------+--------+ +| doc_list | first_element | all_elements | nested | +|------------------------------------------------------+---------------+--------------+--------| +| {"list": [1, 2, 3, 4], "nest_out": {"nest_in": "a"}} | 1 | [1,2,3,4] | a | +| {"list": [], "nest_out": {"nest_in": "a"}} | null | [] | a | +| {"list": [5, 6], "nest_out": {"nest_in": "a"}} | 5 | [5,6] | a | ++------------------------------------------------------+---------------+--------------+--------+ +``` + +## Example 3: Sum of inner elements + +This example shows extracting an inner field and doing statistics on it, using the docs from example 1. It also demonstrates that `spath` always returns strings for inner types. + +```ppl +source=structured +| spath input=doc_n n +| eval n=cast(n as int) +| stats sum(n) +| fields `sum(n)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| sum(n) | +|--------| +| 6 | ++--------+ +``` + +## Example 4: Escaped paths + +`spath` can escape paths with strings to accept any path that `json_extract` does. This includes escaping complex field names as array components. + +```ppl +source=structured +| spath output=a input=doc_escape "['a fancy field name']" +| spath output=b input=doc_escape "['a.b.c']" +| fields a b +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-------+---+ +| a | b | +|-------+---| +| true | 0 | +| true | 1 | +| false | 2 | ++-------+---+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/spath.rst b/docs/user/ppl/cmd/spath.rst deleted file mode 100644 index f7a9d034132..00000000000 --- a/docs/user/ppl/cmd/spath.rst +++ /dev/null @@ -1,92 +0,0 @@ -===== -spath -===== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The `spath` command allows extracting fields from structured text data. It currently allows selecting from JSON data with JSON paths. - -Syntax -====== -spath input= [output=] [path=] - -* input: mandatory. The field to scan for JSON data. -* output: optional. The destination field that the data will be loaded to. **Default:** value of `path`. -* path: mandatory. The path of the data to load for the object. For more information on path syntax, see `json_extract <../functions/json.rst#json_extract>`_. - -Note -===== -The `spath` command currently does not support pushdown behavior for extraction. It will be slow on large datasets. It's generally better to index fields needed for filtering directly instead of using `spath` to filter nested fields. - -Example 1: Simple Field Extraction -================================== - -The simplest spath is to extract a single field. This example extracts `n` from the `doc` field of type `text`. - -PPL query:: - - os> source=structured | spath input=doc_n n | fields doc_n n; - fetched rows / total rows = 3/3 - +----------+---+ - | doc_n | n | - |----------+---| - | {"n": 1} | 1 | - | {"n": 2} | 2 | - | {"n": 3} | 3 | - +----------+---+ - -Example 2: Lists & Nesting -========================== - -This example demonstrates more JSON path uses, like traversing nested fields and extracting list elements. - -PPL query:: - - os> source=structured | spath input=doc_list output=first_element list{0} | spath input=doc_list output=all_elements list{} | spath input=doc_list output=nested nest_out.nest_in | fields doc_list first_element all_elements nested; - fetched rows / total rows = 3/3 - +------------------------------------------------------+---------------+--------------+--------+ - | doc_list | first_element | all_elements | nested | - |------------------------------------------------------+---------------+--------------+--------| - | {"list": [1, 2, 3, 4], "nest_out": {"nest_in": "a"}} | 1 | [1,2,3,4] | a | - | {"list": [], "nest_out": {"nest_in": "a"}} | null | [] | a | - | {"list": [5, 6], "nest_out": {"nest_in": "a"}} | 5 | [5,6] | a | - +------------------------------------------------------+---------------+--------------+--------+ - -Example 3: Sum of inner elements -================================ - -This example shows extracting an inner field and doing statistics on it, using the docs from example 1. It also demonstrates that `spath` always returns strings for inner types. - -PPL query:: - - os> source=structured | spath input=doc_n n | eval n=cast(n as int) | stats sum(n) | fields `sum(n)`; - fetched rows / total rows = 1/1 - +--------+ - | sum(n) | - |--------| - | 6 | - +--------+ - -Example 4: Escaped paths -============================ - -`spath` can escape paths with strings to accept any path that `json_extract` does. This includes escaping complex field names as array components. - -PPL query:: - - os> source=structured | spath output=a input=doc_escape "['a fancy field name']" | spath output=b input=doc_escape "['a.b.c']" | fields a b; - fetched rows / total rows = 3/3 - +-------+---+ - | a | b | - |-------+---| - | true | 0 | - | true | 1 | - | false | 2 | - +-------+---+ diff --git a/docs/user/ppl/cmd/stats.md b/docs/user/ppl/cmd/stats.md new file mode 100644 index 00000000000..5d805b6b723 --- /dev/null +++ b/docs/user/ppl/cmd/stats.md @@ -0,0 +1,487 @@ +# stats + +## Description + +The `stats` command calculates the aggregation from the search result. +## Syntax + +stats [bucket_nullable=bool] \... [by-clause] +* aggregation: mandatory. An aggregation function. +* bucket_nullable: optional. Controls whether the stats command includes null buckets in group-by aggregations. When set to `false`, the aggregation ignores records where the group-by field is null, resulting in faster performance by excluding null bucket. **Default:** Determined by `plugins.ppl.syntax.legacy.preferred`. + * When `plugins.ppl.syntax.legacy.preferred=true`, `bucket_nullable` defaults to `true` + * When `plugins.ppl.syntax.legacy.preferred=false`, `bucket_nullable` defaults to `false` +* by-clause: optional. Groups results by specified fields or expressions. Syntax: by [span-expression,] [field,]... **Default:** If no by-clause is specified, the stats command returns only one row, which is the aggregation over the entire result set. +* span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). The unit of the interval expression is the natural unit by default. If the field is a date/time type field, the aggregation results always ignore null bucket. For example, `span(age, 10)` creates 10-year age buckets, `span(timestamp, 1h)` creates hourly buckets. + * Available time units + * millisecond (ms) + * second (s) + * minute (m, case sensitive) + * hour (h) + * day (d) + * week (w) + * month (M, case sensitive) + * quarter (q) + * year (y) + +## Aggregation Functions + +The stats command supports the following aggregation functions: +* COUNT/C: Count of values +* SUM: Sum of numeric values +* AVG: Average of numeric values +* MAX: Maximum value +* MIN: Minimum value +* VAR_SAMP: Sample variance +* VAR_POP: Population variance +* STDDEV_SAMP: Sample standard deviation +* STDDEV_POP: Population standard deviation +* DISTINCT_COUNT_APPROX: Approximate distinct count +* TAKE: List of original values +* PERCENTILE/PERCENTILE_APPROX: Percentile calculations +* PERC\/P\: Percentile shortcut functions +* MEDIAN: 50th percentile +* EARLIEST: Earliest value by timestamp +* LATEST: Latest value by timestamp +* FIRST: First non-null value +* LAST: Last non-null value +* LIST: Collect all values into array +* VALUES: Collect unique values into sorted array + +For detailed documentation of each function, see [Aggregation Functions](../functions/aggregations.md). +## Example 1: Calculate the count of events + +This example shows calculating the count of events in the accounts. + +```ppl +source=accounts +| stats count() +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| count() | +|---------| +| 4 | ++---------+ +``` + +## Example 2: Calculate the average of a field + +This example shows calculating the average age of all the accounts. + +```ppl +source=accounts +| stats avg(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| avg(age) | +|----------| +| 32.25 | ++----------+ +``` + +## Example 3: Calculate the average of a field by group + +This example shows calculating the average age of all the accounts group by gender. + +```ppl +source=accounts +| stats avg(age) by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------------------+--------+ +| avg(age) | gender | +|--------------------+--------| +| 28.0 | F | +| 33.666666666666664 | M | ++--------------------+--------+ +``` + +## Example 4: Calculate the average, sum and count of a field by group + +This example shows calculating the average age, sum age and count of events of all the accounts group by gender. + +```ppl +source=accounts +| stats avg(age), sum(age), count() by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------------------+----------+---------+--------+ +| avg(age) | sum(age) | count() | gender | +|--------------------+----------+---------+--------| +| 28.0 | 28 | 1 | F | +| 33.666666666666664 | 101 | 3 | M | ++--------------------+----------+---------+--------+ +``` + +## Example 5: Calculate the maximum of a field + +The example calculates the max age of all the accounts. + +```ppl +source=accounts +| stats max(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| max(age) | +|----------| +| 36 | ++----------+ +``` + +## Example 6: Calculate the maximum and minimum of a field by group + +The example calculates the max and min age values of all the accounts group by gender. + +```ppl +source=accounts +| stats max(age), min(age) by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------+----------+--------+ +| max(age) | min(age) | gender | +|----------+----------+--------| +| 28 | 28 | F | +| 36 | 32 | M | ++----------+----------+--------+ +``` + +## Example 7: Calculate the distinct count of a field + +To get the count of distinct values of a field, you can use `DISTINCT_COUNT` (or `DC`) function instead of `COUNT`. The example calculates both the count and the distinct count of gender field of all the accounts. + +```ppl +source=accounts +| stats count(gender), distinct_count(gender) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------+------------------------+ +| count(gender) | distinct_count(gender) | +|---------------+------------------------| +| 4 | 2 | ++---------------+------------------------+ +``` + +## Example 8: Calculate the count by a span + +The example gets the count of age by the interval of 10 years. + +```ppl +source=accounts +| stats count(age) by span(age, 10) as age_span +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++------------+----------+ +| count(age) | age_span | +|------------+----------| +| 1 | 20 | +| 3 | 30 | ++------------+----------+ +``` + +## Example 9: Calculate the count by a gender and span + +The example gets the count of age by the interval of 10 years and group by gender. + +```ppl +source=accounts +| stats count() as cnt by span(age, 5) as age_span, gender +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+----------+--------+ +| cnt | age_span | gender | +|-----+----------+--------| +| 1 | 25 | F | +| 2 | 30 | M | +| 1 | 35 | M | ++-----+----------+--------+ +``` + +Span will always be the first grouping key whatever order you specify. + +```ppl +source=accounts +| stats count() as cnt by gender, span(age, 5) as age_span +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+----------+--------+ +| cnt | age_span | gender | +|-----+----------+--------| +| 1 | 25 | F | +| 2 | 30 | M | +| 1 | 35 | M | ++-----+----------+--------+ +``` + +## Example 10: Calculate the count and get email list by a gender and span + +The example gets the count of age by the interval of 10 years and group by gender, additionally for each row get a list of at most 5 emails. + +```ppl +source=accounts +| stats count() as cnt, take(email, 5) by span(age, 5) as age_span, gender +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+--------------------------------------------+----------+--------+ +| cnt | take(email, 5) | age_span | gender | +|-----+--------------------------------------------+----------+--------| +| 1 | [] | 25 | F | +| 2 | [amberduke@pyrami.com,daleadams@boink.com] | 30 | M | +| 1 | [hattiebond@netagy.com] | 35 | M | ++-----+--------------------------------------------+----------+--------+ +``` + +## Example 11: Calculate the percentile of a field + +This example shows calculating the percentile 90th age of all the accounts. + +```ppl +source=accounts +| stats percentile(age, 90) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+ +| percentile(age, 90) | +|---------------------| +| 36 | ++---------------------+ +``` + +## Example 12: Calculate the percentile of a field by group + +This example shows calculating the percentile 90th age of all the accounts group by gender. + +```ppl +source=accounts +| stats percentile(age, 90) by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++---------------------+--------+ +| percentile(age, 90) | gender | +|---------------------+--------| +| 28 | F | +| 36 | M | ++---------------------+--------+ +``` + +## Example 13: Calculate the percentile by a gender and span + +The example gets the percentile 90th age by the interval of 10 years and group by gender. + +```ppl +source=accounts +| stats percentile(age, 90) as p90 by span(age, 10) as age_span, gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----+----------+--------+ +| p90 | age_span | gender | +|-----+----------+--------| +| 28 | 20 | F | +| 36 | 30 | M | ++-----+----------+--------+ +``` + +## Example 14: Collect all values in a field using LIST + +The example shows how to collect all firstname values, preserving duplicates and order. + +```ppl +source=accounts +| stats list(firstname) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+ +| list(firstname) | +|-----------------------------| +| [Amber,Hattie,Nanette,Dale] | ++-----------------------------+ +``` + +## Example 15: Ignore null bucket + +```ppl +source=accounts +| stats bucket_nullable=false count() as cnt by email +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+-----------------------+ +| cnt | email | +|-----+-----------------------| +| 1 | amberduke@pyrami.com | +| 1 | daleadams@boink.com | +| 1 | hattiebond@netagy.com | ++-----+-----------------------+ +``` + +## Example 16: Collect unique values in a field using VALUES + +The example shows how to collect all unique firstname values, sorted lexicographically with duplicates removed. + +```ppl +source=accounts +| stats values(firstname) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+ +| values(firstname) | +|-----------------------------| +| [Amber,Dale,Hattie,Nanette] | ++-----------------------------+ +``` + +## Example 17: Span on date/time field always ignore null bucket + +Index example data: ++-------+--------+------------+ +Name | DEPTNO | birthday | ++=======+========+============+ +Alice | 1 | 2024-04-21 | ++-------+--------+------------+ +Bob | 2 | 2025-08-21 | ++-------+--------+------------+ +Jeff | null | 2025-04-22 | ++-------+--------+------------+ +Adam | 2 | null | ++-------+--------+------------+ + +```ppl ignore +source=example +| stats count() as cnt by span(birthday, 1y) as year +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+------------+ +| cnt | year | +|-----+------------| +| 1 | 2024-01-01 | +| 2 | 2025-01-01 | ++-----+------------+ +``` + +```ppl ignore +source=example +| stats count() as cnt by span(birthday, 1y) as year, DEPTNO +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+------------+--------+ +| cnt | year | DEPTNO | +|-----+------------+--------| +| 1 | 2024-01-01 | 1 | +| 1 | 2025-01-01 | 2 | +| 1 | 2025-01-01 | null | ++-----+------------+--------+ +``` + +```ppl ignore +source=example +| stats bucket_nullable=false count() as cnt by span(birthday, 1y) as year, DEPTNO +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+------------+--------+ +| cnt | year | DEPTNO | +|-----+------------+--------| +| 1 | 2024-01-01 | 1 | +| 1 | 2025-01-01 | 2 | ++-----+------------+--------+ +``` + +## Example 18: Calculate the count by the implicit @timestamp field + +This example demonstrates that if you omit the field parameter in the span function, it will automatically use the implicit `@timestamp` field. + +```ppl ignore +source=big5 +| stats count() by span(1month) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+---------------------+ +| count() | span(1month) | +|---------+---------------------| +| 1 | 2023-01-01 00:00:00 | ++---------+---------------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst deleted file mode 100644 index cae65c84c79..00000000000 --- a/docs/user/ppl/cmd/stats.rst +++ /dev/null @@ -1,409 +0,0 @@ -===== -stats -===== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``stats`` command calculates the aggregation from the search result. - - - -Syntax -====== -stats [bucket_nullable=bool] ... [by-clause] - -* aggregation: mandatory. An aggregation function. -* bucket_nullable: optional. Controls whether the stats command includes null buckets in group-by aggregations. When set to ``false``, the aggregation ignores records where the group-by field is null, resulting in faster performance by excluding null bucket. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``bucket_nullable`` defaults to ``true`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``bucket_nullable`` defaults to ``false`` - -* by-clause: optional. Groups results by specified fields or expressions. Syntax: by [span-expression,] [field,]... **Default:** If no by-clause is specified, the stats command returns only one row, which is the aggregation over the entire result set. -* span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). The unit of the interval expression is the natural unit by default. If the field is a date/time type field, the aggregation results always ignore null bucket. For example, ``span(age, 10)`` creates 10-year age buckets, ``span(timestamp, 1h)`` creates hourly buckets. - - * Available time units - - * millisecond (ms) - * second (s) - * minute (m, case sensitive) - * hour (h) - * day (d) - * week (w) - * month (M, case sensitive) - * quarter (q) - * year (y) - -Aggregation Functions -===================== - -The stats command supports the following aggregation functions: - -* COUNT/C: Count of values -* SUM: Sum of numeric values -* AVG: Average of numeric values -* MAX: Maximum value -* MIN: Minimum value -* VAR_SAMP: Sample variance -* VAR_POP: Population variance -* STDDEV_SAMP: Sample standard deviation -* STDDEV_POP: Population standard deviation -* DISTINCT_COUNT_APPROX: Approximate distinct count -* TAKE: List of original values -* PERCENTILE/PERCENTILE_APPROX: Percentile calculations -* PERC/P: Percentile shortcut functions -* MEDIAN: 50th percentile -* EARLIEST: Earliest value by timestamp -* LATEST: Latest value by timestamp -* FIRST: First non-null value -* LAST: Last non-null value -* LIST: Collect all values into array -* VALUES: Collect unique values into sorted array - -For detailed documentation of each function, see `Aggregation Functions <../functions/aggregations.rst>`_. - -Example 1: Calculate the count of events -======================================== - -This example shows calculating the count of events in the accounts. - -PPL query:: - - os> source=accounts | stats count(); - fetched rows / total rows = 1/1 - +---------+ - | count() | - |---------| - | 4 | - +---------+ - - -Example 2: Calculate the average of a field -=========================================== - -This example shows calculating the average age of all the accounts. - -PPL query:: - - os> source=accounts | stats avg(age); - fetched rows / total rows = 1/1 - +----------+ - | avg(age) | - |----------| - | 32.25 | - +----------+ - - -Example 3: Calculate the average of a field by group -==================================================== - -This example shows calculating the average age of all the accounts group by gender. - -PPL query:: - - os> source=accounts | stats avg(age) by gender; - fetched rows / total rows = 2/2 - +--------------------+--------+ - | avg(age) | gender | - |--------------------+--------| - | 28.0 | F | - | 33.666666666666664 | M | - +--------------------+--------+ - - -Example 4: Calculate the average, sum and count of a field by group -=================================================================== - -This example shows calculating the average age, sum age and count of events of all the accounts group by gender. - -PPL query:: - - os> source=accounts | stats avg(age), sum(age), count() by gender; - fetched rows / total rows = 2/2 - +--------------------+----------+---------+--------+ - | avg(age) | sum(age) | count() | gender | - |--------------------+----------+---------+--------| - | 28.0 | 28 | 1 | F | - | 33.666666666666664 | 101 | 3 | M | - +--------------------+----------+---------+--------+ - -Example 5: Calculate the maximum of a field -=========================================== - -The example calculates the max age of all the accounts. - -PPL query:: - - os> source=accounts | stats max(age); - fetched rows / total rows = 1/1 - +----------+ - | max(age) | - |----------| - | 36 | - +----------+ - -Example 6: Calculate the maximum and minimum of a field by group -================================================================ - -The example calculates the max and min age values of all the accounts group by gender. - -PPL query:: - - os> source=accounts | stats max(age), min(age) by gender; - fetched rows / total rows = 2/2 - +----------+----------+--------+ - | max(age) | min(age) | gender | - |----------+----------+--------| - | 28 | 28 | F | - | 36 | 32 | M | - +----------+----------+--------+ - -Example 7: Calculate the distinct count of a field -================================================== - -To get the count of distinct values of a field, you can use ``DISTINCT_COUNT`` (or ``DC``) function instead of ``COUNT``. The example calculates both the count and the distinct count of gender field of all the accounts. - -PPL query:: - - os> source=accounts | stats count(gender), distinct_count(gender); - fetched rows / total rows = 1/1 - +---------------+------------------------+ - | count(gender) | distinct_count(gender) | - |---------------+------------------------| - | 4 | 2 | - +---------------+------------------------+ - -Example 8: Calculate the count by a span -======================================== - -The example gets the count of age by the interval of 10 years. - -PPL query:: - - os> source=accounts | stats count(age) by span(age, 10) as age_span - fetched rows / total rows = 2/2 - +------------+----------+ - | count(age) | age_span | - |------------+----------| - | 1 | 20 | - | 3 | 30 | - +------------+----------+ - -Example 9: Calculate the count by a gender and span -=================================================== - -The example gets the count of age by the interval of 10 years and group by gender. - -PPL query:: - - os> source=accounts | stats count() as cnt by span(age, 5) as age_span, gender - fetched rows / total rows = 3/3 - +-----+----------+--------+ - | cnt | age_span | gender | - |-----+----------+--------| - | 1 | 25 | F | - | 2 | 30 | M | - | 1 | 35 | M | - +-----+----------+--------+ - -Span will always be the first grouping key whatever order you specify. - -PPL query:: - - os> source=accounts | stats count() as cnt by gender, span(age, 5) as age_span - fetched rows / total rows = 3/3 - +-----+----------+--------+ - | cnt | age_span | gender | - |-----+----------+--------| - | 1 | 25 | F | - | 2 | 30 | M | - | 1 | 35 | M | - +-----+----------+--------+ - -Example 10: Calculate the count and get email list by a gender and span -======================================================================= - -The example gets the count of age by the interval of 10 years and group by gender, additionally for each row get a list of at most 5 emails. - -PPL query:: - - os> source=accounts | stats count() as cnt, take(email, 5) by span(age, 5) as age_span, gender - fetched rows / total rows = 3/3 - +-----+--------------------------------------------+----------+--------+ - | cnt | take(email, 5) | age_span | gender | - |-----+--------------------------------------------+----------+--------| - | 1 | [] | 25 | F | - | 2 | [amberduke@pyrami.com,daleadams@boink.com] | 30 | M | - | 1 | [hattiebond@netagy.com] | 35 | M | - +-----+--------------------------------------------+----------+--------+ - -Example 11: Calculate the percentile of a field -=============================================== - -This example shows calculating the percentile 90th age of all the accounts. - -PPL query:: - - os> source=accounts | stats percentile(age, 90); - fetched rows / total rows = 1/1 - +---------------------+ - | percentile(age, 90) | - |---------------------| - | 36 | - +---------------------+ - - -Example 12: Calculate the percentile of a field by group -======================================================== - -This example shows calculating the percentile 90th age of all the accounts group by gender. - -PPL query:: - - os> source=accounts | stats percentile(age, 90) by gender; - fetched rows / total rows = 2/2 - +---------------------+--------+ - | percentile(age, 90) | gender | - |---------------------+--------| - | 28 | F | - | 36 | M | - +---------------------+--------+ - -Example 13: Calculate the percentile by a gender and span -========================================================= - -The example gets the percentile 90th age by the interval of 10 years and group by gender. - -PPL query:: - - os> source=accounts | stats percentile(age, 90) as p90 by span(age, 10) as age_span, gender - fetched rows / total rows = 2/2 - +-----+----------+--------+ - | p90 | age_span | gender | - |-----+----------+--------| - | 28 | 20 | F | - | 36 | 30 | M | - +-----+----------+--------+ - -Example 14: Collect all values in a field using LIST -==================================================== - -The example shows how to collect all firstname values, preserving duplicates and order. - -PPL query:: - - PPL> source=accounts | stats list(firstname); - fetched rows / total rows = 1/1 - +-------------------------------------+ - | list(firstname) | - |-------------------------------------| - | ["Amber","Hattie","Nanette","Dale"] | - +-------------------------------------+ - - -Example 15: Ignore null bucket -============================== - - -PPL query:: - - PPL> source=accounts | stats bucket_nullable=false count() as cnt by email; - fetched rows / total rows = 3/3 - +-----+-----------------------+ - | cnt | email | - |-----+-----------------------| - | 1 | amberduke@pyrami.com | - | 1 | daleadams@boink.com | - | 1 | hattiebond@netagy.com | - +-----+-----------------------+ - -Example 16: Collect unique values in a field using VALUES -========================================================== - -The example shows how to collect all unique firstname values, sorted lexicographically with duplicates removed. - -PPL query:: - - PPL> source=accounts | stats values(firstname); - fetched rows / total rows = 1/1 - +-------------------------------------+ - | values(firstname) | - |-------------------------------------| - | ["Amber","Dale","Hattie","Nanette"] | - +-------------------------------------+ - - -Example 17: Span on date/time field always ignore null bucket -============================================================= - -Index example data: - -+-------+--------+------------+ -| Name | DEPTNO | birthday | -+=======+========+============+ -| Alice | 1 | 2024-04-21 | -+-------+--------+------------+ -| Bob | 2 | 2025-08-21 | -+-------+--------+------------+ -| Jeff | null | 2025-04-22 | -+-------+--------+------------+ -| Adam | 2 | null | -+-------+--------+------------+ - -PPL query:: - - PPL> source=example | stats count() as cnt by span(birthday, 1y) as year; - fetched rows / total rows = 3/3 - +-----+------------+ - | cnt | year | - |-----+------------| - | 1 | 2024-01-01 | - | 2 | 2025-01-01 | - +-----+------------+ - - -PPL query:: - - PPL> source=example | stats count() as cnt by span(birthday, 1y) as year, DEPTNO; - fetched rows / total rows = 3/3 - +-----+------------+--------+ - | cnt | year | DEPTNO | - |-----+------------+--------| - | 1 | 2024-01-01 | 1 | - | 1 | 2025-01-01 | 2 | - | 1 | 2025-01-01 | null | - +-----+------------+--------+ - - -PPL query:: - - PPL> source=example | stats bucket_nullable=false count() as cnt by span(birthday, 1y) as year, DEPTNO; - fetched rows / total rows = 3/3 - +-----+------------+--------+ - | cnt | year | DEPTNO | - |-----+------------+--------| - | 1 | 2024-01-01 | 1 | - | 1 | 2025-01-01 | 2 | - +-----+------------+--------+ - - -Example 18: Calculate the count by the implicit @timestamp field -================================================================ - -This example demonstrates that if you omit the field parameter in the span function, it will automatically use the implicit ``@timestamp`` field. - -PPL query:: - - PPL> source=big5 | stats count() by span(1month) - fetched rows / total rows = 1/1 - +---------+---------------------+ - | count() | span(1month) | - |---------+---------------------| - | 1 | 2023-01-01 00:00:00 | - +---------+---------------------+ diff --git a/docs/user/ppl/cmd/streamstats.md b/docs/user/ppl/cmd/streamstats.md new file mode 100644 index 00000000000..c7f79b21339 --- /dev/null +++ b/docs/user/ppl/cmd/streamstats.md @@ -0,0 +1,281 @@ +# streamstats + +## Description + +The `streamstats` command is used to calculate cumulative or rolling statistics as events are processed in order. Unlike `stats` or `eventstats` which operate on the entire dataset at once, it computes values incrementally on a per-event basis, often respecting the order of events in the search results. It allows you to generate running totals, moving averages, and other statistics that evolve with the stream of events. +Key aspects of `streamstats`: +1. It computes statistics incrementally as each event is processed, making it suitable for time-series and sequence-based analysis. +2. Supports arguments such as window (for sliding window calculations) and current (to control whether the current event included in calculation). +3. Retains all original events and appends new fields containing the calculated statistics. +4. Particularly useful for calculating running totals, identifying trends, or detecting changes over sequences of events. + +Difference between `stats`, `eventstats` and `streamstats` +All of these commands can be used to generate aggregations such as average, sum, and maximum, but they have some key differences in how they operate and what they produce: +* Transformation Behavior + * `stats`: Transforms all events into an aggregated result table, losing original event structure. + * `eventstats`: Adds aggregation results as new fields to the original events without removing the event structure. + * `streamstats`: Adds cumulative (running) aggregation results to each event as they stream through the pipeline. +* Output Format + * `stats`: Output contains only aggregated values. Original raw events are not preserved. + * `eventstats`: Original events remain, with extra fields containing summary statistics. + * `streamstats`: Original events remain, with extra fields containing running totals or cumulative statistics. +* Aggregation Scope + * `stats`: Based on all events in the search (or groups defined by BY clause). + * `eventstats`: Based on all relevant events, then the result is added back to each event in the group. + * `streamstats`: Calculations occur progressively as each event is processed; can be scoped by window. +* Use Cases + * `stats`: When only aggregated results are needed (e.g., counts, averages, sums). + * `eventstats`: When aggregated statistics are needed alongside original event data. + * `streamstats`: When a running total or cumulative statistic is needed across event streams. + +## Syntax + +streamstats [bucket_nullable=bool] [current=\] [window=\] [global=\] [reset_before="("\")"] [reset_after="("\")"] \... [by-clause] +* function: mandatory. A aggregation function or window function. +* bucket_nullable: optional. Controls whether the streamstats command consider null buckets as a valid group in group-by aggregations. When set to `false`, it will not treat null group-by values as a distinct group during aggregation. **Default:** Determined by `plugins.ppl.syntax.legacy.preferred`. + * When `plugins.ppl.syntax.legacy.preferred=true`, `bucket_nullable` defaults to `true` + * When `plugins.ppl.syntax.legacy.preferred=false`, `bucket_nullable` defaults to `false` +* current: optional. If true, the search includes the given, or current, event in the summary calculations. If false, the search uses the field value from the previous event. Syntax: current=\. **Default:** true. +* window: optional. Specifies the number of events to use when computing the statistics. Syntax: window=\. **Default:** 0, which means that all previous and current events are used. +* global: optional. Used only when the window argument is set. Defines whether to use a single window, global=true, or to use separate windows based on the by clause. If global=false and window is set to a non-zero value, a separate window is used for each group of values of the field specified in the by clause. Syntax: global=\. **Default:** true. +* reset_before: optional. Before streamstats calculates for an event, reset_before resets all accumulated statistics when the eval-expression evaluates to true. If used with window, the window is also reset. Syntax: reset_before="("\")". **Default:** false. +* reset_after: optional. After streamstats calculations for an event, reset_after resets all accumulated statistics when the eval-expression evaluates to true. This expression can reference fields returned by streamstats. If used with window, the window is also reset. Syntax: reset_after="("\")". **Default:** false. +* by-clause: optional. The by clause could be the fields and expressions like scalar functions and aggregation functions. Besides, the span clause can be used to split specific field into buckets in the same interval, the stats then does the aggregation by these span buckets. Syntax: by [span-expression,] [field,]... **Default:** If no \ is specified, all events are processed as a single group and running statistics are computed across the entire event stream. +* span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). For example, `span(age, 10)` creates 10-year age buckets, `span(timestamp, 1h)` creates hourly buckets. + * Available time units + * millisecond (ms) + * second (s) + * minute (m, case sensitive) + * hour (h) + * day (d) + * week (w) + * month (M, case sensitive) + * quarter (q) + * year (y) + +## Aggregation Functions + +The streamstats command supports the following aggregation functions: +* COUNT: Count of values +* SUM: Sum of numeric values +* AVG: Average of numeric values +* MAX: Maximum value +* MIN: Minimum value +* VAR_SAMP: Sample variance +* VAR_POP: Population variance +* STDDEV_SAMP: Sample standard deviation +* STDDEV_POP: Population standard deviation +* DISTINCT_COUNT/DC: Distinct count of values +* EARLIEST: Earliest value by timestamp +* LATEST: Latest value by timestamp + +For detailed documentation of each function, see [Aggregation Functions](../functions/aggregations.md). +## Usage + +Streamstats + +``` +source = table | streamstats avg(a) +source = table | streamstats current = false avg(a) +source = table | streamstats window = 5 sum(b) +source = table | streamstats current = false window = 2 max(a) +source = table | where a < 50 | streamstats count(c) +source = table | streamstats min(c), max(c) by b +source = table | streamstats count(c) as count_by by b | where count_by > 1000 +source = table | streamstats dc(field) as distinct_count +source = table | streamstats distinct_count(category) by region +source = table | streamstats current=false window=2 global=false avg(a) by b +source = table | streamstats window=2 reset_before=a>31 avg(b) +source = table | streamstats current=false reset_after=a>31 avg(b) by c +``` + +## Example 1: Calculate the running average, sum, and count of a field by group + +This example calculates the running average age, running sum of age, and running count of events for all the accounts, grouped by gender. + +```ppl +source=accounts +| streamstats avg(age) as running_avg, sum(age) as running_sum, count() as running_count by gender +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+--------------------+-------------+---------------+ +| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | running_avg | running_sum | running_count | +|----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+--------------------+-------------+---------------| +| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | 32.0 | 32 | 1 | +| 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | 34.0 | 68 | 2 | +| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | 28.0 | 28 | 1 | +| 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | 33.666666666666664 | 101 | 3 | ++----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+--------------------+-------------+---------------+ +``` + +## Example 2: Running maximum age over a 2-row window + +This example calculates the running maximum age over a 2-row window, excluding the current event. + +```ppl +source=state_country +| streamstats current=false window=2 max(age) as prev_max_age +``` + +Expected output: + +```text +fetched rows / total rows = 8/8 ++-------+---------+------------+-------+------+-----+--------------+ +| name | country | state | month | year | age | prev_max_age | +|-------+---------+------------+-------+------+-----+--------------| +| Jake | USA | California | 4 | 2023 | 70 | null | +| Hello | USA | New York | 4 | 2023 | 30 | 70 | +| John | Canada | Ontario | 4 | 2023 | 25 | 70 | +| Jane | Canada | Quebec | 4 | 2023 | 20 | 30 | +| Jim | Canada | B.C | 4 | 2023 | 27 | 25 | +| Peter | Canada | B.C | 4 | 2023 | 57 | 27 | +| Rick | Canada | B.C | 4 | 2023 | 70 | 57 | +| David | USA | Washington | 4 | 2023 | 40 | 70 | ++-------+---------+------------+-------+------+-----+--------------+ +``` + +## Example 3: Use the global argument to calculate running statistics + +The global argument is only applicable when a window argument is set. It defines how the window is applied in relation to the grouping fields: +* global=true: a global window is applied across all rows, but the calculations inside the window still respect the by groups. +* global=false: the window itself is created per group, meaning each group gets its own independent window. + +This example shows how to calculate the running average of age across accounts by country, using global argument. +original data + +-------+---------+------------+-------+------+-----+ + | name | country | state | month | year | age | + + |-------+---------+------------+-------+------+-----+ + | Jake | USA | California | 4 | 2023 | 70 | + | Hello | USA | New York | 4 | 2023 | 30 | + | John | Canada | Ontario | 4 | 2023 | 25 | + | Jane | Canada | Quebec | 4 | 2023 | 20 | + | Jim | Canada | B.C | 4 | 2023 | 27 | + | Peter | Canada | B.C | 4 | 2023 | 57 | + | Rick | Canada | B.C | 4 | 2023 | 70 | + | David | USA | Washington | 4 | 2023 | 40 | + + +-------+---------+------------+-------+------+-----+ +* global=true: The window slides across all rows globally (following their input order), but inside each window, aggregation is still computed by country. So we process the data stream row by row to build the sliding window with size 2. We can see that David and Rick are in a window. +* global=false: Each by group (country) forms its own independent stream and window (size 2). So David and Hello are in one window for USA. This time we get running_avg 35 for David, rather than 40 when global is set true. + +```ppl +source=state_country +| streamstats window=2 global=true avg(age) as running_avg by country +``` + +Expected output: + +```text +fetched rows / total rows = 8/8 ++-------+---------+------------+-------+------+-----+-------------+ +| name | country | state | month | year | age | running_avg | +|-------+---------+------------+-------+------+-----+-------------| +| Jake | USA | California | 4 | 2023 | 70 | 70.0 | +| Hello | USA | New York | 4 | 2023 | 30 | 50.0 | +| John | Canada | Ontario | 4 | 2023 | 25 | 25.0 | +| Jane | Canada | Quebec | 4 | 2023 | 20 | 22.5 | +| Jim | Canada | B.C | 4 | 2023 | 27 | 23.5 | +| Peter | Canada | B.C | 4 | 2023 | 57 | 42.0 | +| Rick | Canada | B.C | 4 | 2023 | 70 | 63.5 | +| David | USA | Washington | 4 | 2023 | 40 | 40.0 | ++-------+---------+------------+-------+------+-----+-------------+ +``` + +```ppl +source=state_country +| streamstats window=2 global=false avg(age) as running_avg by country ; +``` + +Expected output: + +```text +fetched rows / total rows = 8/8 ++-------+---------+------------+-------+------+-----+-------------+ +| name | country | state | month | year | age | running_avg | +|-------+---------+------------+-------+------+-----+-------------| +| Jake | USA | California | 4 | 2023 | 70 | 70.0 | +| Hello | USA | New York | 4 | 2023 | 30 | 50.0 | +| John | Canada | Ontario | 4 | 2023 | 25 | 25.0 | +| Jane | Canada | Quebec | 4 | 2023 | 20 | 22.5 | +| Jim | Canada | B.C | 4 | 2023 | 27 | 23.5 | +| Peter | Canada | B.C | 4 | 2023 | 57 | 42.0 | +| Rick | Canada | B.C | 4 | 2023 | 70 | 63.5 | +| David | USA | Washington | 4 | 2023 | 40 | 35.0 | ++-------+---------+------------+-------+------+-----+-------------+ +``` + +## Example 4: Use the reset_before and reset_after arguments to reset statistics + +This example calculates the running average of age across accounts by country, with resets applied. + +```ppl +source=state_country +| streamstats current=false reset_before=age>34 reset_after=age<25 avg(age) as avg_age by country +``` + +Expected output: + +```text +fetched rows / total rows = 8/8 ++-------+---------+------------+-------+------+-----+---------+ +| name | country | state | month | year | age | avg_age | +|-------+---------+------------+-------+------+-----+---------| +| Jake | USA | California | 4 | 2023 | 70 | null | +| Hello | USA | New York | 4 | 2023 | 30 | 70.0 | +| John | Canada | Ontario | 4 | 2023 | 25 | null | +| Jane | Canada | Quebec | 4 | 2023 | 20 | 25.0 | +| Jim | Canada | B.C | 4 | 2023 | 27 | null | +| Peter | Canada | B.C | 4 | 2023 | 57 | null | +| Rick | Canada | B.C | 4 | 2023 | 70 | null | +| David | USA | Washington | 4 | 2023 | 40 | null | ++-------+---------+------------+-------+------+-----+---------+ +``` + +## Example 5: Null buckets handling + +```ppl +source=accounts +| streamstats bucket_nullable=false count() as cnt by employer +| fields account_number, firstname, employer, cnt +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------+------+ +| account_number | firstname | employer | cnt | +|----------------+-----------+----------+------| +| 1 | Amber | Pyrami | 1 | +| 6 | Hattie | Netagy | 1 | +| 13 | Nanette | Quility | 1 | +| 18 | Dale | null | null | ++----------------+-----------+----------+------+ +``` + +```ppl +source=accounts +| streamstats bucket_nullable=true count() as cnt by employer +| fields account_number, firstname, employer, cnt +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------+-----+ +| account_number | firstname | employer | cnt | +|----------------+-----------+----------+-----| +| 1 | Amber | Pyrami | 1 | +| 6 | Hattie | Netagy | 1 | +| 13 | Nanette | Quility | 1 | +| 18 | Dale | null | 1 | ++----------------+-----------+----------+-----+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/streamstats.rst b/docs/user/ppl/cmd/streamstats.rst deleted file mode 100644 index e38df779791..00000000000 --- a/docs/user/ppl/cmd/streamstats.rst +++ /dev/null @@ -1,273 +0,0 @@ -=========== -streamstats -=========== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``streamstats`` command is used to calculate cumulative or rolling statistics as events are processed in order. Unlike ``stats`` or ``eventstats`` which operate on the entire dataset at once, it computes values incrementally on a per-event basis, often respecting the order of events in the search results. It allows you to generate running totals, moving averages, and other statistics that evolve with the stream of events. - -Key aspects of `streamstats`: - -1. It computes statistics incrementally as each event is processed, making it suitable for time-series and sequence-based analysis. -2. Supports arguments such as window (for sliding window calculations) and current (to control whether the current event included in calculation). -3. Retains all original events and appends new fields containing the calculated statistics. -4. Particularly useful for calculating running totals, identifying trends, or detecting changes over sequences of events. - -Difference between ``stats``, ``eventstats`` and ``streamstats`` - -All of these commands can be used to generate aggregations such as average, sum, and maximum, but they have some key differences in how they operate and what they produce: - -* Transformation Behavior - - * ``stats``: Transforms all events into an aggregated result table, losing original event structure. - * ``eventstats``: Adds aggregation results as new fields to the original events without removing the event structure. - * ``streamstats``: Adds cumulative (running) aggregation results to each event as they stream through the pipeline. - -* Output Format - - * ``stats``: Output contains only aggregated values. Original raw events are not preserved. - * ``eventstats``: Original events remain, with extra fields containing summary statistics. - * ``streamstats``: Original events remain, with extra fields containing running totals or cumulative statistics. - -* Aggregation Scope - - * ``stats``: Based on all events in the search (or groups defined by BY clause). - * ``eventstats``: Based on all relevant events, then the result is added back to each event in the group. - * ``streamstats``: Calculations occur progressively as each event is processed; can be scoped by window. - -* Use Cases - - * ``stats``: When only aggregated results are needed (e.g., counts, averages, sums). - * ``eventstats``: When aggregated statistics are needed alongside original event data. - * ``streamstats``: When a running total or cumulative statistic is needed across event streams. - -Syntax -====== -streamstats [bucket_nullable=bool] [current=] [window=] [global=] [reset_before="("")"] [reset_after="("")"] ... [by-clause] - -* function: mandatory. A aggregation function or window function. -* bucket_nullable: optional. Controls whether the streamstats command consider null buckets as a valid group in group-by aggregations. When set to ``false``, it will not treat null group-by values as a distinct group during aggregation. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``bucket_nullable`` defaults to ``true`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``bucket_nullable`` defaults to ``false`` - -* current: optional. If true, the search includes the given, or current, event in the summary calculations. If false, the search uses the field value from the previous event. Syntax: current=. **Default:** true. -* window: optional. Specifies the number of events to use when computing the statistics. Syntax: window=. **Default:** 0, which means that all previous and current events are used. -* global: optional. Used only when the window argument is set. Defines whether to use a single window, global=true, or to use separate windows based on the by clause. If global=false and window is set to a non-zero value, a separate window is used for each group of values of the field specified in the by clause. Syntax: global=. **Default:** true. -* reset_before: optional. Before streamstats calculates for an event, reset_before resets all accumulated statistics when the eval-expression evaluates to true. If used with window, the window is also reset. Syntax: reset_before="("")". **Default:** false. -* reset_after: optional. After streamstats calculations for an event, reset_after resets all accumulated statistics when the eval-expression evaluates to true. This expression can reference fields returned by streamstats. If used with window, the window is also reset. Syntax: reset_after="("")". **Default:** false. -* by-clause: optional. The by clause could be the fields and expressions like scalar functions and aggregation functions. Besides, the span clause can be used to split specific field into buckets in the same interval, the stats then does the aggregation by these span buckets. Syntax: by [span-expression,] [field,]... **Default:** If no is specified, all events are processed as a single group and running statistics are computed across the entire event stream. -* span-expression: optional, at most one. Splits field into buckets by intervals. Syntax: span(field_expr, interval_expr). For example, ``span(age, 10)`` creates 10-year age buckets, ``span(timestamp, 1h)`` creates hourly buckets. - - * Available time units - - * millisecond (ms) - * second (s) - * minute (m, case sensitive) - * hour (h) - * day (d) - * week (w) - * month (M, case sensitive) - * quarter (q) - * year (y) - -Aggregation Functions -===================== - -The streamstats command supports the following aggregation functions: - -* COUNT: Count of values -* SUM: Sum of numeric values -* AVG: Average of numeric values -* MAX: Maximum value -* MIN: Minimum value -* VAR_SAMP: Sample variance -* VAR_POP: Population variance -* STDDEV_SAMP: Sample standard deviation -* STDDEV_POP: Population standard deviation -* DISTINCT_COUNT/DC: Distinct count of values -* EARLIEST: Earliest value by timestamp -* LATEST: Latest value by timestamp - -For detailed documentation of each function, see `Aggregation Functions <../functions/aggregations.rst>`_. - -Usage -===== - -Streamstats:: - - source = table | streamstats avg(a) - source = table | streamstats current = false avg(a) - source = table | streamstats window = 5 sum(b) - source = table | streamstats current = false window = 2 max(a) - source = table | where a < 50 | streamstats count(c) - source = table | streamstats min(c), max(c) by b - source = table | streamstats count(c) as count_by by b | where count_by > 1000 - source = table | streamstats dc(field) as distinct_count - source = table | streamstats distinct_count(category) by region - source = table | streamstats current=false window=2 global=false avg(a) by b - source = table | streamstats window=2 reset_before=a>31 avg(b) - source = table | streamstats current=false reset_after=a>31 avg(b) by c - - -Example 1: Calculate the running average, sum, and count of a field by group -============================================================================ - -This example calculates the running average age, running sum of age, and running count of events for all the accounts, grouped by gender. - -PPL query:: - - os> source=accounts | streamstats avg(age) as running_avg, sum(age) as running_sum, count() as running_count by gender; - fetched rows / total rows = 4/4 - +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+--------------------+-------------+---------------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | running_avg | running_sum | running_count | - |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+--------------------+-------------+---------------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | 32.0 | 32 | 1 | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | 34.0 | 68 | 2 | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | 28.0 | 28 | 1 | - | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | 33.666666666666664 | 101 | 3 | - +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+--------------------+-------------+---------------+ - - -Example 2: Running maximum age over a 2-row window -================================================== - -This example calculates the running maximum age over a 2-row window, excluding the current event. - -PPL query:: - - os> source=state_country | streamstats current=false window=2 max(age) as prev_max_age - fetched rows / total rows = 8/8 - +-------+---------+------------+-------+------+-----+--------------+ - | name | country | state | month | year | age | prev_max_age | - |-------+---------+------------+-------+------+-----+--------------| - | Jake | USA | California | 4 | 2023 | 70 | null | - | Hello | USA | New York | 4 | 2023 | 30 | 70 | - | John | Canada | Ontario | 4 | 2023 | 25 | 70 | - | Jane | Canada | Quebec | 4 | 2023 | 20 | 30 | - | Jim | Canada | B.C | 4 | 2023 | 27 | 25 | - | Peter | Canada | B.C | 4 | 2023 | 57 | 27 | - | Rick | Canada | B.C | 4 | 2023 | 70 | 57 | - | David | USA | Washington | 4 | 2023 | 40 | 70 | - +-------+---------+------------+-------+------+-----+--------------+ - - -Example 3: Use the global argument to calculate running statistics -================================================================== - -The global argument is only applicable when a window argument is set. It defines how the window is applied in relation to the grouping fields: - -* global=true: a global window is applied across all rows, but the calculations inside the window still respect the by groups. -* global=false: the window itself is created per group, meaning each group gets its own independent window. - -This example shows how to calculate the running average of age across accounts by country, using global argument. - -original data:: - - +-------+---------+------------+-------+------+-----+ - | name | country | state | month | year | age | - |-------+---------+------------+-------+------+-----+ - | Jake | USA | California | 4 | 2023 | 70 | - | Hello | USA | New York | 4 | 2023 | 30 | - | John | Canada | Ontario | 4 | 2023 | 25 | - | Jane | Canada | Quebec | 4 | 2023 | 20 | - | Jim | Canada | B.C | 4 | 2023 | 27 | - | Peter | Canada | B.C | 4 | 2023 | 57 | - | Rick | Canada | B.C | 4 | 2023 | 70 | - | David | USA | Washington | 4 | 2023 | 40 | - +-------+---------+------------+-------+------+-----+ - -* global=true: The window slides across all rows globally (following their input order), but inside each window, aggregation is still computed by country. So we process the data stream row by row to build the sliding window with size 2. We can see that David and Rick are in a window. -* global=false: Each by group (country) forms its own independent stream and window (size 2). So David and Hello are in one window for USA. This time we get running_avg 35 for David, rather than 40 when global is set true. - -PPL query:: - - os> source=state_country | streamstats window=2 global=true avg(age) as running_avg by country ; - fetched rows / total rows = 8/8 - +-------+---------+------------+-------+------+-----+-------------+ - | name | country | state | month | year | age | running_avg | - |-------+---------+------------+-------+------+-----+-------------| - | Jake | USA | California | 4 | 2023 | 70 | 70.0 | - | Hello | USA | New York | 4 | 2023 | 30 | 50.0 | - | John | Canada | Ontario | 4 | 2023 | 25 | 25.0 | - | Jane | Canada | Quebec | 4 | 2023 | 20 | 22.5 | - | Jim | Canada | B.C | 4 | 2023 | 27 | 23.5 | - | Peter | Canada | B.C | 4 | 2023 | 57 | 42.0 | - | Rick | Canada | B.C | 4 | 2023 | 70 | 63.5 | - | David | USA | Washington | 4 | 2023 | 40 | 40.0 | - +-------+---------+------------+-------+------+-----+-------------+ - - os> source=state_country | streamstats window=2 global=false avg(age) as running_avg by country ; - fetched rows / total rows = 8/8 - +-------+---------+------------+-------+------+-----+-------------+ - | name | country | state | month | year | age | running_avg | - |-------+---------+------------+-------+------+-----+-------------| - | Jake | USA | California | 4 | 2023 | 70 | 70.0 | - | Hello | USA | New York | 4 | 2023 | 30 | 50.0 | - | John | Canada | Ontario | 4 | 2023 | 25 | 25.0 | - | Jane | Canada | Quebec | 4 | 2023 | 20 | 22.5 | - | Jim | Canada | B.C | 4 | 2023 | 27 | 23.5 | - | Peter | Canada | B.C | 4 | 2023 | 57 | 42.0 | - | Rick | Canada | B.C | 4 | 2023 | 70 | 63.5 | - | David | USA | Washington | 4 | 2023 | 40 | 35.0 | - +-------+---------+------------+-------+------+-----+-------------+ - - -Example 4: Use the reset_before and reset_after arguments to reset statistics -============================================================================= - -This example calculates the running average of age across accounts by country, with resets applied. - -PPL query:: - - os> source=state_country | streamstats current=false reset_before=age>34 reset_after=age<25 avg(age) as avg_age by country; - fetched rows / total rows = 8/8 - +-------+---------+------------+-------+------+-----+---------+ - | name | country | state | month | year | age | avg_age | - |-------+---------+------------+-------+------+-----+---------| - | Jake | USA | California | 4 | 2023 | 70 | null | - | Hello | USA | New York | 4 | 2023 | 30 | 70.0 | - | John | Canada | Ontario | 4 | 2023 | 25 | null | - | Jane | Canada | Quebec | 4 | 2023 | 20 | 25.0 | - | Jim | Canada | B.C | 4 | 2023 | 27 | null | - | Peter | Canada | B.C | 4 | 2023 | 57 | null | - | Rick | Canada | B.C | 4 | 2023 | 70 | null | - | David | USA | Washington | 4 | 2023 | 40 | null | - +-------+---------+------------+-------+------+-----+---------+ - - -Example 5: Null buckets handling -================================ - -PPL query:: - - os> source=accounts | streamstats bucket_nullable=false count() as cnt by employer | fields account_number, firstname, employer, cnt; - fetched rows / total rows = 4/4 - +----------------+-----------+----------+------+ - | account_number | firstname | employer | cnt | - |----------------+-----------+----------+------| - | 1 | Amber | Pyrami | 1 | - | 6 | Hattie | Netagy | 1 | - | 13 | Nanette | Quility | 1 | - | 18 | Dale | null | null | - +----------------+-----------+----------+------+ - -PPL query:: - - os> source=accounts | streamstats bucket_nullable=true count() as cnt by employer | fields account_number, firstname, employer, cnt; - fetched rows / total rows = 4/4 - +----------------+-----------+----------+-----+ - | account_number | firstname | employer | cnt | - |----------------+-----------+----------+-----| - | 1 | Amber | Pyrami | 1 | - | 6 | Hattie | Netagy | 1 | - | 13 | Nanette | Quility | 1 | - | 18 | Dale | null | 1 | - +----------------+-----------+----------+-----+ \ No newline at end of file diff --git a/docs/user/ppl/cmd/subquery.md b/docs/user/ppl/cmd/subquery.md new file mode 100644 index 00000000000..aa33fbbb119 --- /dev/null +++ b/docs/user/ppl/cmd/subquery.md @@ -0,0 +1,197 @@ +# subquery + +## Description + +The `subquery` command allows you to embed one PPL query inside another, enabling complex filtering and data retrieval operations. A subquery is a nested query that executes first and returns results that are used by the outer query for filtering, comparison, or joining operations. +Subqueries are useful for: +1. Filtering data based on results from another query +2. Checking for the existence of related data +3. Performing calculations that depend on aggregated values from other tables +4. Creating complex joins with dynamic conditions + +## Syntax + +subquery: [ source=... \| ... \| ... ] + +Subqueries use the same syntax as regular PPL queries but must be enclosed in square brackets. There are four main types of subqueries: + +**IN Subquery** +Tests whether a field value exists in the results of a subquery: + +```sql ignore +where [not] in [ source=... | ... | ... ] +``` + +**EXISTS Subquery** +Tests whether a subquery returns any results: + +```sql ignore +where [not] exists [ source=... | ... | ... ] +``` + +**Scalar Subquery** +Returns a single value that can be used in comparisons or calculations + +```sql ignore +where = [ source=... | ... | ... ] +``` + +**Relation Subquery** +Used in join operations to provide dynamic right-side data + +```sql ignore +| join ON condition [ source=... | ... | ... ] +``` + +## Configuration + +### plugins.ppl.subsearch.maxout + +The size configures the maximum of rows to return from subsearch. The default value is: `10000`. A value of `0` indicates that the restriction is unlimited. + +Change the subsearch.maxout to unlimited: + +```bash ignore +sh$ curl -sS -H 'Content-Type: application/json' \ +... -X PUT localhost:9200/_plugins/_query/settings \ +... -d '{"persistent" : {"plugins.ppl.subsearch.maxout" : "0"}}' +{ + "acknowledged": true, + "persistent": { + "plugins": { + "ppl": { + "subsearch": { + "maxout": "-1" + } + } + } + }, + "transient": {} +} +``` + +## Usage + +InSubquery: + +``` +source = outer | where a in [ source = inner | fields b ] +source = outer | where (a) in [ source = inner | fields b ] +source = outer | where (a,b,c) in [ source = inner | fields d,e,f ] +source = outer | where a not in [ source = inner | fields b ] +source = outer | where (a) not in [ source = inner | fields b ] +source = outer | where (a,b,c) not in [ source = inner | fields d,e,f ] +source = outer a in [ source = inner | fields b ] // search filtering with subquery +source = outer a not in [ source = inner | fields b ] // search filtering with subquery) +source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ] // nested +source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c //as join filter +``` + +ExistsSubquery: + +``` +// Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner, `e`, `f` are fields of table nested +source = outer | where exists [ source = inner | where a = c ] +source = outer | where not exists [ source = inner | where a = c ] +source = outer | where exists [ source = inner | where a = c and b = d ] +source = outer | where not exists [ source = inner | where a = c and b = d ] +source = outer exists [ source = inner | where a = c ] // search filtering with subquery +source = outer not exists [ source = inner | where a = c ] //search filtering with subquery +source = table as t1 exists [ source = table as t2 | where t1.a = t2.a ] //table alias is useful in exists subquery +source = outer | where exists [ source = inner1 | where a = c and exists [ source = nested | where c = e ] ] //nested +source = outer | where exists [ source = inner1 | where a = c | where exists [ source = nested | where c = e ] ] //nested +source = outer | where exists [ source = inner | where c > 10 ] //uncorrelated exists +source = outer | where not exists [ source = inner | where c > 10 ] //uncorrelated exists +source = outer | where exists [ source = inner ] | eval l = "nonEmpty" | fields l //special uncorrelated exists +``` + +ScalarSubquery: + +``` +//Uncorrelated scalar subquery in Select +source = outer | eval m = [ source = inner | stats max(c) ] | fields m, a +source = outer | eval m = [ source = inner | stats max(c) ] + b | fields m, a +//Uncorrelated scalar subquery in Where** +source = outer | where a > [ source = inner | stats min(c) ] | fields a +//Uncorrelated scalar subquery in Search filter +source = outer a > [ source = inner | stats min(c) ] | fields a +//Correlated scalar subquery in Select +source = outer | eval m = [ source = inner | where outer.b = inner.d | stats max(c) ] | fields m, a +source = outer | eval m = [ source = inner | where b = d | stats max(c) ] | fields m, a +source = outer | eval m = [ source = inner | where outer.b > inner.d | stats max(c) ] | fields m, a +//Correlated scalar subquery in Where +source = outer | where a = [ source = inner | where outer.b = inner.d | stats max(c) ] +source = outer | where a = [ source = inner | where b = d | stats max(c) ] +source = outer | where [ source = inner | where outer.b = inner.d OR inner.d = 1 | stats count() ] > 0 | fields a +//Correlated scalar subquery in Search filter +source = outer a = [ source = inner | where b = d | stats max(c) ] +source = outer [ source = inner | where outer.b = inner.d OR inner.d = 1 | stats count() ] > 0 | fields a +//Nested scalar subquery +source = outer | where a = [ source = inner | stats max(c) | sort c ] OR b = [ source = inner | where c = 1 | stats min(d) | sort d ] +source = outer | where a = [ source = inner | where c = [ source = nested | stats max(e) by f | sort f ] | stats max(d) by c | sort c | head 1 ] +RelationSubquery +source = table1 | join left = l right = r on condition [ source = table2 | where d > 10 | head 5 ] //subquery in join right side +source = [ source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] | stats count(a) by b ] as outer | head 1 +``` + +## Example 1: TPC-H q20 + +This example shows a complex TPC-H query 20 implementation using nested subqueries. + +```bash ignore +curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ + "query" : """ + source = supplier + | join ON s_nationkey = n_nationkey nation + | where n_name = 'CANADA' + and s_suppkey in [ + source = partsupp + | where ps_partkey in [ + source = part + | where like(p_name, 'forest%') + | fields p_partkey + ] + and ps_availqty > [ + source = lineitem + | where l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date('1994-01-01') + and l_shipdate < date_add(date('1994-01-01'), interval 1 year) + | stats sum(l_quantity) as sum_l_quantity + | eval half_sum_l_quantity = 0.5 * sum_l_quantity // Stats and Eval commands can combine when issues/819 resolved + | fields half_sum_l_quantity + ] + | fields ps_suppkey + ] + """ +}' +``` + +## Example 2: TPC-H q22 + +This example shows a TPC-H query 22 implementation using EXISTS and scalar subqueries. + +```bash ignore +curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ + "query" : """ + source = [ + source = customer + | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > [ + source = customer + | where c_acctbal > 0.00 + and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') + | stats avg(c_acctbal) + ] + and not exists [ + source = orders + | where o_custkey = c_custkey + ] + | eval cntrycode = substring(c_phone, 1, 2) + | fields cntrycode, c_acctbal + ] as custsale + | stats count() as numcust, sum(c_acctbal) as totacctbal by cntrycode + | sort cntrycode + """ + }' + ``` \ No newline at end of file diff --git a/docs/user/ppl/cmd/subquery.rst b/docs/user/ppl/cmd/subquery.rst deleted file mode 100644 index 48491db22e2..00000000000 --- a/docs/user/ppl/cmd/subquery.rst +++ /dev/null @@ -1,206 +0,0 @@ -======== -subquery -======== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``subquery`` command allows you to embed one PPL query inside another, enabling complex filtering and data retrieval operations. A subquery is a nested query that executes first and returns results that are used by the outer query for filtering, comparison, or joining operations. - -| Subqueries are useful for: - -1. Filtering data based on results from another query -2. Checking for the existence of related data -3. Performing calculations that depend on aggregated values from other tables -4. Creating complex joins with dynamic conditions - -Syntax -====== -subquery: [ source=... | ... | ... ] - -Subqueries use the same syntax as regular PPL queries but must be enclosed in square brackets. There are four main types of subqueries: - -**IN Subquery** -Tests whether a field value exists in the results of a subquery:: - - where [not] in [ source=... | ... | ... ] - -**EXISTS Subquery** -Tests whether a subquery returns any results:: - - where [not] exists [ source=... | ... | ... ] - -**Scalar Subquery** -Returns a single value that can be used in comparisons or calculations:: - - where = [ source=... | ... | ... ] - -**Relation Subquery** -Used in join operations to provide dynamic right-side data:: - - | join ON condition [ source=... | ... | ... ] - -Configuration -============= - -plugins.ppl.subsearch.maxout ----------------------------- - -The size configures the maximum of rows to return from subsearch. The default value is: ``10000``. A value of ``0`` indicates that the restriction is unlimited. - -Change the subsearch.maxout to unlimited:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X PUT localhost:9200/_plugins/_query/settings \ - ... -d '{"persistent" : {"plugins.ppl.subsearch.maxout" : "0"}}' - { - "acknowledged": true, - "persistent": { - "plugins": { - "ppl": { - "subsearch": { - "maxout": "-1" - } - } - } - }, - "transient": {} - } - -Usage -===== - -InSubquery:: - - source = outer | where a in [ source = inner | fields b ] - source = outer | where (a) in [ source = inner | fields b ] - source = outer | where (a,b,c) in [ source = inner | fields d,e,f ] - source = outer | where a not in [ source = inner | fields b ] - source = outer | where (a) not in [ source = inner | fields b ] - source = outer | where (a,b,c) not in [ source = inner | fields d,e,f ] - source = outer a in [ source = inner | fields b ] // search filtering with subquery - source = outer a not in [ source = inner | fields b ] // search filtering with subquery) - source = outer | where a in [ source = inner1 | where b not in [ source = inner2 | fields c ] | fields b ] // nested - source = table1 | inner join left = l right = r on l.a = r.a AND r.a in [ source = inner | fields d ] | fields l.a, r.a, b, c //as join filter - -ExistsSubquery:: - - // Assumptions: `a`, `b` are fields of table outer, `c`, `d` are fields of table inner, `e`, `f` are fields of table nested - source = outer | where exists [ source = inner | where a = c ] - source = outer | where not exists [ source = inner | where a = c ] - source = outer | where exists [ source = inner | where a = c and b = d ] - source = outer | where not exists [ source = inner | where a = c and b = d ] - source = outer exists [ source = inner | where a = c ] // search filtering with subquery - source = outer not exists [ source = inner | where a = c ] //search filtering with subquery - source = table as t1 exists [ source = table as t2 | where t1.a = t2.a ] //table alias is useful in exists subquery - source = outer | where exists [ source = inner1 | where a = c and exists [ source = nested | where c = e ] ] //nested - source = outer | where exists [ source = inner1 | where a = c | where exists [ source = nested | where c = e ] ] //nested - source = outer | where exists [ source = inner | where c > 10 ] //uncorrelated exists - source = outer | where not exists [ source = inner | where c > 10 ] //uncorrelated exists - source = outer | where exists [ source = inner ] | eval l = "nonEmpty" | fields l //special uncorrelated exists - -ScalarSubquery:: - - //Uncorrelated scalar subquery in Select - source = outer | eval m = [ source = inner | stats max(c) ] | fields m, a - source = outer | eval m = [ source = inner | stats max(c) ] + b | fields m, a - - //Uncorrelated scalar subquery in Where** - source = outer | where a > [ source = inner | stats min(c) ] | fields a - - //Uncorrelated scalar subquery in Search filter - source = outer a > [ source = inner | stats min(c) ] | fields a - - //Correlated scalar subquery in Select - source = outer | eval m = [ source = inner | where outer.b = inner.d | stats max(c) ] | fields m, a - source = outer | eval m = [ source = inner | where b = d | stats max(c) ] | fields m, a - source = outer | eval m = [ source = inner | where outer.b > inner.d | stats max(c) ] | fields m, a - - //Correlated scalar subquery in Where - source = outer | where a = [ source = inner | where outer.b = inner.d | stats max(c) ] - source = outer | where a = [ source = inner | where b = d | stats max(c) ] - source = outer | where [ source = inner | where outer.b = inner.d OR inner.d = 1 | stats count() ] > 0 | fields a - - //Correlated scalar subquery in Search filter - source = outer a = [ source = inner | where b = d | stats max(c) ] - source = outer [ source = inner | where outer.b = inner.d OR inner.d = 1 | stats count() ] > 0 | fields a - - //Nested scalar subquery - source = outer | where a = [ source = inner | stats max(c) | sort c ] OR b = [ source = inner | where c = 1 | stats min(d) | sort d ] - source = outer | where a = [ source = inner | where c = [ source = nested | stats max(e) by f | sort f ] | stats max(d) by c | sort c | head 1 ] - -RelationSubquery:: - - source = table1 | join left = l right = r on condition [ source = table2 | where d > 10 | head 5 ] //subquery in join right side - source = [ source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ] | stats count(a) by b ] as outer | head 1 - -Example 1: TPC-H q20 -==================== - -This example shows a complex TPC-H query 20 implementation using nested subqueries. - -PPL query:: - - >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ - "query" : """ - source = supplier - | join ON s_nationkey = n_nationkey nation - | where n_name = 'CANADA' - and s_suppkey in [ - source = partsupp - | where ps_partkey in [ - source = part - | where like(p_name, 'forest%') - | fields p_partkey - ] - and ps_availqty > [ - source = lineitem - | where l_partkey = ps_partkey - and l_suppkey = ps_suppkey - and l_shipdate >= date('1994-01-01') - and l_shipdate < date_add(date('1994-01-01'), interval 1 year) - | stats sum(l_quantity) as sum_l_quantity - | eval half_sum_l_quantity = 0.5 * sum_l_quantity // Stats and Eval commands can combine when issues/819 resolved - | fields half_sum_l_quantity - ] - | fields ps_suppkey - ] - """ - }' - -Example 2: TPC-H q22 -==================== - -This example shows a TPC-H query 22 implementation using EXISTS and scalar subqueries. - -PPL query:: - - >> curl -H 'Content-Type: application/json' -X POST localhost:9200/_plugins/_ppl -d '{ - "query" : """ - source = [ - source = customer - | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') - and c_acctbal > [ - source = customer - | where c_acctbal > 0.00 - and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') - | stats avg(c_acctbal) - ] - and not exists [ - source = orders - | where o_custkey = c_custkey - ] - | eval cntrycode = substring(c_phone, 1, 2) - | fields cntrycode, c_acctbal - ] as custsale - | stats count() as numcust, sum(c_acctbal) as totacctbal by cntrycode - | sort cntrycode - """ - }' - diff --git a/docs/user/ppl/cmd/syntax.md b/docs/user/ppl/cmd/syntax.md new file mode 100644 index 00000000000..32c5ebe89d9 --- /dev/null +++ b/docs/user/ppl/cmd/syntax.md @@ -0,0 +1,18 @@ +# Syntax + +## Command Order + +The PPL query starts with either the `search` command to reference a table to search from, or the `describe` command to reference a table to get its metadata. All the following command could be in any order. In the following example, `search` command refer the accounts index as the source, then using fields and where command to do the further processing. + +```text +search source=accounts +| where age > 18 +| fields firstname, lastname +``` + +## Required arguments + +Required arguments are shown in angle brackets < >. +## Optional arguments + +Optional arguments are enclosed in square brackets [ ]. \ No newline at end of file diff --git a/docs/user/ppl/cmd/syntax.rst b/docs/user/ppl/cmd/syntax.rst deleted file mode 100644 index c15aad68e15..00000000000 --- a/docs/user/ppl/cmd/syntax.rst +++ /dev/null @@ -1,30 +0,0 @@ -====== -Syntax -====== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - -Command Order -============= -The PPL query starts with either the ``search`` command to reference a table to search from, or the ``describe`` command to reference a table to get its metadata. All the following command could be in any order. In the following example, ``search`` command refer the accounts index as the source, then using fields and where command to do the further processing. - -.. code-block:: - - search source=accounts - | where age > 18 - | fields firstname, lastname - - -Required arguments -================== -Required arguments are shown in angle brackets < >. - - -Optional arguments -================== -Optional arguments are enclosed in square brackets [ ]. - diff --git a/docs/user/ppl/cmd/table.md b/docs/user/ppl/cmd/table.md new file mode 100644 index 00000000000..176752ebfba --- /dev/null +++ b/docs/user/ppl/cmd/table.md @@ -0,0 +1,37 @@ +# table + +## Description + +The `table` command is an alias for the [`fields`](fields.md) command and provides the same field selection capabilities. It allows you to keep or remove fields from the search result using enhanced syntax options. +## Syntax + +table [+\|-] \ +* [+\|-]: optional. If the plus (+) is used, only the fields specified in the field list will be kept. If the minus (-) is used, all the fields specified in the field list will be removed. **Default:** +. +* field-list: mandatory. Comma-delimited or space-delimited list of fields to keep or remove. Supports wildcard patterns. + +## Example 1: Basic table command usage + +This example shows basic field selection using the table command. + +```ppl +source=accounts +| table firstname lastname age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+-----+ +| firstname | lastname | age | +|-----------+----------+-----| +| Amber | Duke | 32 | +| Hattie | Bond | 36 | +| Nanette | Bates | 28 | +| Dale | Adams | 33 | ++-----------+----------+-----+ +``` + +## See Also + +- [fields](fields.md) - Alias command with identical functionality \ No newline at end of file diff --git a/docs/user/ppl/cmd/table.rst b/docs/user/ppl/cmd/table.rst deleted file mode 100644 index 3512a648a1c..00000000000 --- a/docs/user/ppl/cmd/table.rst +++ /dev/null @@ -1,44 +0,0 @@ -===== -table -===== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -The ``table`` command is an alias for the `fields `_ command and provides the same field selection capabilities. It allows you to keep or remove fields from the search result using enhanced syntax options. - -Syntax -====== -table [+|-] - -* [+|-]: optional. If the plus (+) is used, only the fields specified in the field list will be kept. If the minus (-) is used, all the fields specified in the field list will be removed. **Default:** +. -* field-list: mandatory. Comma-delimited or space-delimited list of fields to keep or remove. Supports wildcard patterns. - -Example 1: Basic table command usage -==================================== - -This example shows basic field selection using the table command. - -PPL query:: - - os> source=accounts | table firstname lastname age; - fetched rows / total rows = 4/4 - +-----------+----------+-----+ - | firstname | lastname | age | - |-----------+----------+-----| - | Amber | Duke | 32 | - | Hattie | Bond | 36 | - | Nanette | Bates | 28 | - | Dale | Adams | 33 | - +-----------+----------+-----+ - - -See Also -======== -- `fields `_ - Alias command with identical functionality \ No newline at end of file diff --git a/docs/user/ppl/cmd/timechart.md b/docs/user/ppl/cmd/timechart.md new file mode 100644 index 00000000000..da3831c7aea --- /dev/null +++ b/docs/user/ppl/cmd/timechart.md @@ -0,0 +1,375 @@ +# timechart + +## Description + +The `timechart` command creates a time-based aggregation of data. It groups data by time intervals and optionally by a field, then applies an aggregation function to each group. The results are returned in an unpivoted format with separate rows for each time-field combination. +## Syntax + +timechart [timefield=\] [span=\] [limit=\] [useother=\] \ [by \] +* timefield: optional. Specifies the timestamp field to use for time interval grouping. **Default**: `@timestamp`. +* span: optional. Specifies the time interval for grouping data. **Default:** 1m (1 minute). + * Available time units: + * millisecond (ms) + * second (s) + * minute (m, case sensitive) + * hour (h) + * day (d) + * week (w) + * month (M, case sensitive) + * quarter (q) + * year (y) +* limit: optional. Specifies the maximum number of distinct values to display when using the "by" clause. **Default:** 10. + * When there are more distinct values than the limit, the additional values are grouped into an "OTHER" category if useother is not set to false. + * The "most distinct" values are determined by calculating the sum of the aggregation values across all time intervals for each distinct field value. The top N values with the highest sums are displayed individually, while the rest are grouped into the "OTHER" category. + * Set to 0 to show all distinct values without any limit (when limit=0, useother is automatically set to false). + * The parameters can be specified in any order before the aggregation function. + * Only applies when using the "by" clause to group results. +* useother: optional. Controls whether to create an "OTHER" category for values beyond the limit. **Default:** true. + * When set to false, only the top N values (based on limit) are shown without an "OTHER" column. + * When set to true, values beyond the limit are grouped into an "OTHER" category. + * Only applies when using the "by" clause and when there are more distinct values than the limit. +* usenull: optional. Controls whether NULL values are placed into a separate category in the chart. **Default:** true. + * When set to true, NULL values are grouped into a separate category with the label specified by nullstr. + * When set to false, NULL values are excluded from the results. +* nullstr: optional. The display label used for NULL values when usenull is true. **Default:** "NULL". + * Specifies the string representation for the NULL category in the chart output. +* aggregation_function: mandatory. The aggregation function to apply to each time bucket. + * Currently, only a single aggregation function is supported. + * Available functions: All aggregation functions supported by the [stats](stats.md) command, as well as the timechart-specific aggregations listed below. +* by: optional. Groups the results by the specified field in addition to time intervals. If not specified, the aggregation is performed across all documents in each time interval. + +## PER_SECOND + +Usage: per_second(field) calculates the per-second rate for a numeric field within each time bucket. +The calculation formula is: `per_second(field) = sum(field) / span_in_seconds`, where `span_in_seconds` is the span interval in seconds. +Return type: DOUBLE +## PER_MINUTE + +Usage: per_minute(field) calculates the per-minute rate for a numeric field within each time bucket. +The calculation formula is: `per_minute(field) = sum(field) * 60 / span_in_seconds`, where `span_in_seconds` is the span interval in seconds. +Return type: DOUBLE +## PER_HOUR + +Usage: per_hour(field) calculates the per-hour rate for a numeric field within each time bucket. +The calculation formula is: `per_hour(field) = sum(field) * 3600 / span_in_seconds`, where `span_in_seconds` is the span interval in seconds. +Return type: DOUBLE +## PER_DAY + +Usage: per_day(field) calculates the per-day rate for a numeric field within each time bucket. +The calculation formula is: `per_day(field) = sum(field) * 86400 / span_in_seconds`, where `span_in_seconds` is the span interval in seconds. +Return type: DOUBLE +## Notes + +* The `timechart` command requires a timestamp field in the data. By default, it uses the `@timestamp` field, but you can specify a different field using the `timefield` parameter. +* Results are returned in an unpivoted format with separate rows for each time-field combination that has data. +* Only combinations with actual data are included in the results - empty combinations are omitted rather than showing null or zero values. +* The "top N" values for the `limit` parameter are selected based on the sum of values across all time intervals for each distinct field value. +* When using the `limit` parameter, values beyond the limit are grouped into an "OTHER" category (unless `useother=false`). +* Examples 6 and 7 use different datasets: Example 6 uses the `events` dataset with fewer hosts for simplicity, while Example 7 uses the `events_many_hosts` dataset with 11 distinct hosts. +* **Null values**: Documents with null values in the "by" field are treated as a separate category and appear as null in the results. + +## Example 1: Count events by hour + +This example counts events for each hour and groups them by host. + +```ppl +source=events +| timechart span=1h count() by host +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++---------------------+---------+---------+ +| @timestamp | host | count() | +|---------------------+---------+---------| +| 2023-01-01 10:00:00 | server1 | 4 | +| 2023-01-01 10:00:00 | server2 | 4 | ++---------------------+---------+---------+ +``` + +## Example 2: Count events by minute + +This example counts events for each minute and groups them by host. + +```ppl +source=events +| timechart span=1m count() by host +``` + +Expected output: + +```text +fetched rows / total rows = 8/8 ++---------------------+---------+---------+ +| @timestamp | host | count() | +|---------------------+---------+---------| +| 2023-01-01 10:00:00 | server1 | 1 | +| 2023-01-01 10:05:00 | server2 | 1 | +| 2023-01-01 10:10:00 | server1 | 1 | +| 2023-01-01 10:15:00 | server2 | 1 | +| 2023-01-01 10:20:00 | server1 | 1 | +| 2023-01-01 10:25:00 | server2 | 1 | +| 2023-01-01 10:30:00 | server1 | 1 | +| 2023-01-01 10:35:00 | server2 | 1 | ++---------------------+---------+---------+ +``` + +## Example 3: Calculate average number of packets by minute + +This example calculates the average packets for each minute without grouping by any field. + +```ppl +source=events +| timechart span=1m avg(packets) +``` + +Expected output: + +```text +fetched rows / total rows = 8/8 ++---------------------+--------------+ +| @timestamp | avg(packets) | +|---------------------+--------------| +| 2023-01-01 10:00:00 | 60.0 | +| 2023-01-01 10:05:00 | 30.0 | +| 2023-01-01 10:10:00 | 60.0 | +| 2023-01-01 10:15:00 | 30.0 | +| 2023-01-01 10:20:00 | 60.0 | +| 2023-01-01 10:25:00 | 30.0 | +| 2023-01-01 10:30:00 | 180.0 | +| 2023-01-01 10:35:00 | 90.0 | ++---------------------+--------------+ +``` + +## Example 4: Calculate average number of packets by every 20 minutes and status + +This example calculates the average number of packets for every 20 minutes and groups them by status. + +```ppl +source=events +| timechart span=20m avg(packets) by status +``` + +Expected output: + +```text +fetched rows / total rows = 8/8 ++---------------------+------------+--------------+ +| @timestamp | status | avg(packets) | +|---------------------+------------+--------------| +| 2023-01-01 10:00:00 | active | 30.0 | +| 2023-01-01 10:00:00 | inactive | 30.0 | +| 2023-01-01 10:00:00 | pending | 60.0 | +| 2023-01-01 10:00:00 | processing | 60.0 | +| 2023-01-01 10:20:00 | cancelled | 180.0 | +| 2023-01-01 10:20:00 | completed | 60.0 | +| 2023-01-01 10:20:00 | inactive | 90.0 | +| 2023-01-01 10:20:00 | pending | 30.0 | ++---------------------+------------+--------------+ +``` + +## Example 5: Count events by hour and category + +This example counts events for each second and groups them by category + +```ppl +source=events +| timechart span=1h count() by category +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++---------------------+----------+---------+ +| @timestamp | category | count() | +|---------------------+----------+---------| +| 2023-01-01 10:00:00 | orders | 4 | +| 2023-01-01 10:00:00 | users | 4 | ++---------------------+----------+---------+ +``` + +## Example 6: Using the limit parameter with count() function + +When there are many distinct values in the "by" field, the timechart command will display the top values based on the limit parameter and group the rest into an "OTHER" category. +This query will display the top 2 hosts with the highest count values, and group the remaining hosts into an "OTHER" category. + +```ppl +source=events +| timechart span=1m limit=2 count() by host +``` + +Expected output: + +```text +fetched rows / total rows = 8/8 ++---------------------+---------+---------+ +| @timestamp | host | count() | +|---------------------+---------+---------| +| 2023-01-01 10:00:00 | server1 | 1 | +| 2023-01-01 10:05:00 | server2 | 1 | +| 2023-01-01 10:10:00 | server1 | 1 | +| 2023-01-01 10:15:00 | server2 | 1 | +| 2023-01-01 10:20:00 | server1 | 1 | +| 2023-01-01 10:25:00 | server2 | 1 | +| 2023-01-01 10:30:00 | server1 | 1 | +| 2023-01-01 10:35:00 | server2 | 1 | ++---------------------+---------+---------+ +``` + +## Example 7: Using limit=0 with count() to show all values + +To display all distinct values without any limit, set limit=0: + +```ppl +source=events_many_hosts +| timechart span=1h limit=0 count() by host +``` + +Expected output: + +```text +fetched rows / total rows = 11/11 ++---------------------+--------+---------+ +| @timestamp | host | count() | +|---------------------+--------+---------| +| 2024-07-01 00:00:00 | web-01 | 1 | +| 2024-07-01 00:00:00 | web-02 | 1 | +| 2024-07-01 00:00:00 | web-03 | 1 | +| 2024-07-01 00:00:00 | web-04 | 1 | +| 2024-07-01 00:00:00 | web-05 | 1 | +| 2024-07-01 00:00:00 | web-06 | 1 | +| 2024-07-01 00:00:00 | web-07 | 1 | +| 2024-07-01 00:00:00 | web-08 | 1 | +| 2024-07-01 00:00:00 | web-09 | 1 | +| 2024-07-01 00:00:00 | web-10 | 1 | +| 2024-07-01 00:00:00 | web-11 | 1 | ++---------------------+--------+---------+ +``` + +This shows all 11 hosts as separate rows without an "OTHER" category. +## Example 8: Using useother=false with count() function + +Limit to top 10 hosts without OTHER category (useother=false): + +```ppl +source=events_many_hosts +| timechart span=1h useother=false count() by host +``` + +Expected output: + +```text +fetched rows / total rows = 10/10 ++---------------------+--------+---------+ +| @timestamp | host | count() | +|---------------------+--------+---------| +| 2024-07-01 00:00:00 | web-01 | 1 | +| 2024-07-01 00:00:00 | web-02 | 1 | +| 2024-07-01 00:00:00 | web-03 | 1 | +| 2024-07-01 00:00:00 | web-04 | 1 | +| 2024-07-01 00:00:00 | web-05 | 1 | +| 2024-07-01 00:00:00 | web-06 | 1 | +| 2024-07-01 00:00:00 | web-07 | 1 | +| 2024-07-01 00:00:00 | web-08 | 1 | +| 2024-07-01 00:00:00 | web-09 | 1 | +| 2024-07-01 00:00:00 | web-10 | 1 | ++---------------------+--------+---------+ +``` + +## Example 9: Using limit with useother parameter and avg() function + +Limit to top 3 hosts with OTHER category (default useother=true): + +```ppl +source=events_many_hosts +| timechart span=1h limit=3 avg(cpu_usage) by host +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------------------+--------+----------------+ +| @timestamp | host | avg(cpu_usage) | +|---------------------+--------+----------------| +| 2024-07-01 00:00:00 | OTHER | 41.3 | +| 2024-07-01 00:00:00 | web-03 | 55.3 | +| 2024-07-01 00:00:00 | web-07 | 48.6 | +| 2024-07-01 00:00:00 | web-09 | 67.8 | ++---------------------+--------+----------------+ +``` + +Limit to top 3 hosts without OTHER category (useother=false): + +```ppl +source=events_many_hosts +| timechart span=1h limit=3 useother=false avg(cpu_usage) by host +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++---------------------+--------+----------------+ +| @timestamp | host | avg(cpu_usage) | +|---------------------+--------+----------------| +| 2024-07-01 00:00:00 | web-03 | 55.3 | +| 2024-07-01 00:00:00 | web-07 | 48.6 | +| 2024-07-01 00:00:00 | web-09 | 67.8 | ++---------------------+--------+----------------+ +``` + +## Example 10: Handling null values in the "by" field + +This example shows how null values in the "by" field are treated as a separate category. The dataset events_null has 1 entry that does not have a host field. +It is put into a separate "NULL" category because the defaults for `usenull` and `nullstr` are `true` and `"NULL"` respectively. + +```ppl +source=events_null +| timechart span=1h count() by host +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------------------+--------+---------+ +| @timestamp | host | count() | +|---------------------+--------+---------| +| 2024-07-01 00:00:00 | NULL | 1 | +| 2024-07-01 00:00:00 | db-01 | 1 | +| 2024-07-01 00:00:00 | web-01 | 2 | +| 2024-07-01 00:00:00 | web-02 | 2 | ++---------------------+--------+---------+ +``` + +## Example 11: Calculate packets per second rate + +This example calculates the per-second packet rate for network traffic data using the per_second() function. + +```ppl +source=events +| timechart span=30m per_second(packets) by host +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------------------+---------+---------------------+ +| @timestamp | host | per_second(packets) | +|---------------------+---------+---------------------| +| 2023-01-01 10:00:00 | server1 | 0.1 | +| 2023-01-01 10:00:00 | server2 | 0.05 | +| 2023-01-01 10:30:00 | server1 | 0.1 | +| 2023-01-01 10:30:00 | server2 | 0.05 | ++---------------------+---------+---------------------+ +``` + +## Limitations + +* Only a single aggregation function is supported per timechart command. +* The `bins` parameter and other bin options are not supported since the `bin` command is not implemented yet. Use the `span` parameter to control time intervals. \ No newline at end of file diff --git a/docs/user/ppl/cmd/timechart.rst b/docs/user/ppl/cmd/timechart.rst deleted file mode 100644 index 21ac980d46a..00000000000 --- a/docs/user/ppl/cmd/timechart.rst +++ /dev/null @@ -1,351 +0,0 @@ -========= -timechart -========= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``timechart`` command creates a time-based aggregation of data. It groups data by time intervals and optionally by a field, then applies an aggregation function to each group. The results are returned in an unpivoted format with separate rows for each time-field combination. - -Syntax -====== - -timechart [timefield=] [span=] [limit=] [useother=] [by ] - -* timefield: optional. Specifies the timestamp field to use for time interval grouping. **Default**: ``@timestamp``. - -* span: optional. Specifies the time interval for grouping data. **Default:** 1m (1 minute). - - * Available time units: - - * millisecond (ms) - * second (s) - * minute (m, case sensitive) - * hour (h) - * day (d) - * week (w) - * month (M, case sensitive) - * quarter (q) - * year (y) - -* limit: optional. Specifies the maximum number of distinct values to display when using the "by" clause. **Default:** 10. - - * When there are more distinct values than the limit, the additional values are grouped into an "OTHER" category if useother is not set to false. - * The "most distinct" values are determined by calculating the sum of the aggregation values across all time intervals for each distinct field value. The top N values with the highest sums are displayed individually, while the rest are grouped into the "OTHER" category. - * Set to 0 to show all distinct values without any limit (when limit=0, useother is automatically set to false). - * The parameters can be specified in any order before the aggregation function. - * Only applies when using the "by" clause to group results. - -* useother: optional. Controls whether to create an "OTHER" category for values beyond the limit. **Default:** true. - - * When set to false, only the top N values (based on limit) are shown without an "OTHER" column. - * When set to true, values beyond the limit are grouped into an "OTHER" category. - * Only applies when using the "by" clause and when there are more distinct values than the limit. - -* aggregation_function: mandatory. The aggregation function to apply to each time bucket. - - * Currently, only a single aggregation function is supported. - * Available functions: All aggregation functions supported by the :doc:`stats ` command, as well as the timechart-specific aggregations listed below. - -* by: optional. Groups the results by the specified field in addition to time intervals. If not specified, the aggregation is performed across all documents in each time interval. - -PER_SECOND ----------- - -Usage: per_second(field) calculates the per-second rate for a numeric field within each time bucket. - -The calculation formula is: `per_second(field) = sum(field) / span_in_seconds`, where `span_in_seconds` is the span interval in seconds. - -Return type: DOUBLE - -PER_MINUTE ----------- - -Usage: per_minute(field) calculates the per-minute rate for a numeric field within each time bucket. - -The calculation formula is: `per_minute(field) = sum(field) * 60 / span_in_seconds`, where `span_in_seconds` is the span interval in seconds. - -Return type: DOUBLE - -PER_HOUR --------- - -Usage: per_hour(field) calculates the per-hour rate for a numeric field within each time bucket. - -The calculation formula is: `per_hour(field) = sum(field) * 3600 / span_in_seconds`, where `span_in_seconds` is the span interval in seconds. - -Return type: DOUBLE - -PER_DAY -------- - -Usage: per_day(field) calculates the per-day rate for a numeric field within each time bucket. - -The calculation formula is: `per_day(field) = sum(field) * 86400 / span_in_seconds`, where `span_in_seconds` is the span interval in seconds. - -Return type: DOUBLE - -Notes -===== - -* The ``timechart`` command requires a timestamp field in the data. By default, it uses the ``@timestamp`` field, but you can specify a different field using the ``timefield`` parameter. -* Results are returned in an unpivoted format with separate rows for each time-field combination that has data. -* Only combinations with actual data are included in the results - empty combinations are omitted rather than showing null or zero values. -* The "top N" values for the ``limit`` parameter are selected based on the sum of values across all time intervals for each distinct field value. -* When using the ``limit`` parameter, values beyond the limit are grouped into an "OTHER" category (unless ``useother=false``). -* Examples 6 and 7 use different datasets: Example 6 uses the ``events`` dataset with fewer hosts for simplicity, while Example 7 uses the ``events_many_hosts`` dataset with 11 distinct hosts. - -* **Null values**: Documents with null values in the "by" field are treated as a separate category and appear as null in the results. - -Example 1: Count events by hour -=============================== - -This example counts events for each hour and groups them by host. - -PPL query:: - - os> source=events | timechart span=1h count() by host - fetched rows / total rows = 2/2 - +---------------------+---------+---------+ - | @timestamp | host | count() | - |---------------------+---------+---------| - | 2023-01-01 10:00:00 | server1 | 4 | - | 2023-01-01 10:00:00 | server2 | 4 | - +---------------------+---------+---------+ - -Example 2: Count events by minute -========================================================== - -This example counts events for each minute and groups them by host. - -PPL query:: - - os> source=events | timechart span=1m count() by host - fetched rows / total rows = 8/8 - +---------------------+---------+---------+ - | @timestamp | host | count() | - |---------------------+---------+---------| - | 2023-01-01 10:00:00 | server1 | 1 | - | 2023-01-01 10:05:00 | server2 | 1 | - | 2023-01-01 10:10:00 | server1 | 1 | - | 2023-01-01 10:15:00 | server2 | 1 | - | 2023-01-01 10:20:00 | server1 | 1 | - | 2023-01-01 10:25:00 | server2 | 1 | - | 2023-01-01 10:30:00 | server1 | 1 | - | 2023-01-01 10:35:00 | server2 | 1 | - +---------------------+---------+---------+ - -Example 3: Calculate average number of packets by minute -================================================ - -This example calculates the average packets for each minute without grouping by any field. - -PPL query:: - - os> source=events | timechart span=1m avg(packets) - fetched rows / total rows = 8/8 - +---------------------+--------------+ - | @timestamp | avg(packets) | - |---------------------+--------------| - | 2023-01-01 10:00:00 | 60.0 | - | 2023-01-01 10:05:00 | 30.0 | - | 2023-01-01 10:10:00 | 60.0 | - | 2023-01-01 10:15:00 | 30.0 | - | 2023-01-01 10:20:00 | 60.0 | - | 2023-01-01 10:25:00 | 30.0 | - | 2023-01-01 10:30:00 | 180.0 | - | 2023-01-01 10:35:00 | 90.0 | - +---------------------+--------------+ - -Example 4: Calculate average number of packets by every 20 minutes and status -=========================================================== - -This example calculates the average number of packets for every 20 minutes and groups them by status. - -PPL query:: - - os> source=events | timechart span=20m avg(packets) by status - fetched rows / total rows = 8/8 - +---------------------+------------+--------------+ - | @timestamp | status | avg(packets) | - |---------------------+------------+--------------| - | 2023-01-01 10:00:00 | active | 30.0 | - | 2023-01-01 10:00:00 | inactive | 30.0 | - | 2023-01-01 10:00:00 | pending | 60.0 | - | 2023-01-01 10:00:00 | processing | 60.0 | - | 2023-01-01 10:20:00 | cancelled | 180.0 | - | 2023-01-01 10:20:00 | completed | 60.0 | - | 2023-01-01 10:20:00 | inactive | 90.0 | - | 2023-01-01 10:20:00 | pending | 30.0 | - +---------------------+------------+--------------+ - -Example 5: Count events by hour and category -===================================================================== - -This example counts events for each second and groups them by category - -PPL query:: - - os> source=events | timechart span=1h count() by category - fetched rows / total rows = 2/2 - +---------------------+----------+---------+ - | @timestamp | category | count() | - |---------------------+----------+---------| - | 2023-01-01 10:00:00 | orders | 4 | - | 2023-01-01 10:00:00 | users | 4 | - +---------------------+----------+---------+ - -Example 6: Using the limit parameter with count() function -========================================================== - -When there are many distinct values in the "by" field, the timechart command will display the top values based on the limit parameter and group the rest into an "OTHER" category. -This query will display the top 2 hosts with the highest count values, and group the remaining hosts into an "OTHER" category. - -PPL query:: - - os> source=events | timechart span=1m limit=2 count() by host - fetched rows / total rows = 8/8 - +---------------------+---------+---------+ - | @timestamp | host | count() | - |---------------------+---------+---------| - | 2023-01-01 10:00:00 | server1 | 1 | - | 2023-01-01 10:05:00 | server2 | 1 | - | 2023-01-01 10:10:00 | server1 | 1 | - | 2023-01-01 10:15:00 | server2 | 1 | - | 2023-01-01 10:20:00 | server1 | 1 | - | 2023-01-01 10:25:00 | server2 | 1 | - | 2023-01-01 10:30:00 | server1 | 1 | - | 2023-01-01 10:35:00 | server2 | 1 | - +---------------------+---------+---------+ - -Example 7: Using limit=0 with count() to show all values -======================================================== - -To display all distinct values without any limit, set limit=0: - -PPL query:: - - os> source=events_many_hosts | timechart span=1h limit=0 count() by host - fetched rows / total rows = 11/11 - +---------------------+--------+---------+ - | @timestamp | host | count() | - |---------------------+--------+---------| - | 2024-07-01 00:00:00 | web-01 | 1 | - | 2024-07-01 00:00:00 | web-02 | 1 | - | 2024-07-01 00:00:00 | web-03 | 1 | - | 2024-07-01 00:00:00 | web-04 | 1 | - | 2024-07-01 00:00:00 | web-05 | 1 | - | 2024-07-01 00:00:00 | web-06 | 1 | - | 2024-07-01 00:00:00 | web-07 | 1 | - | 2024-07-01 00:00:00 | web-08 | 1 | - | 2024-07-01 00:00:00 | web-09 | 1 | - | 2024-07-01 00:00:00 | web-10 | 1 | - | 2024-07-01 00:00:00 | web-11 | 1 | - +---------------------+--------+---------+ - -This shows all 11 hosts as separate rows without an "OTHER" category. - -Example 8: Using useother=false with count() function -===================================================== - -Limit to top 10 hosts without OTHER category (useother=false): - -PPL query:: - - os> source=events_many_hosts | timechart span=1h useother=false count() by host - fetched rows / total rows = 10/10 - +---------------------+--------+---------+ - | @timestamp | host | count() | - |---------------------+--------+---------| - | 2024-07-01 00:00:00 | web-01 | 1 | - | 2024-07-01 00:00:00 | web-02 | 1 | - | 2024-07-01 00:00:00 | web-03 | 1 | - | 2024-07-01 00:00:00 | web-04 | 1 | - | 2024-07-01 00:00:00 | web-05 | 1 | - | 2024-07-01 00:00:00 | web-06 | 1 | - | 2024-07-01 00:00:00 | web-07 | 1 | - | 2024-07-01 00:00:00 | web-08 | 1 | - | 2024-07-01 00:00:00 | web-09 | 1 | - | 2024-07-01 00:00:00 | web-10 | 1 | - +---------------------+--------+---------+ - -Example 9: Using limit with useother parameter and avg() function -================================================================= - -Limit to top 3 hosts with OTHER category (default useother=true): - -PPL query:: - - os> source=events_many_hosts | timechart span=1h limit=3 avg(cpu_usage) by host - fetched rows / total rows = 4/4 - +---------------------+--------+----------------+ - | @timestamp | host | avg(cpu_usage) | - |---------------------+--------+----------------| - | 2024-07-01 00:00:00 | OTHER | 41.3 | - | 2024-07-01 00:00:00 | web-03 | 55.3 | - | 2024-07-01 00:00:00 | web-07 | 48.6 | - | 2024-07-01 00:00:00 | web-09 | 67.8 | - +---------------------+--------+----------------+ - -Limit to top 3 hosts without OTHER category (useother=false): - -PPL query:: - - os> source=events_many_hosts | timechart span=1h limit=3 useother=false avg(cpu_usage) by host - fetched rows / total rows = 3/3 - +---------------------+--------+----------------+ - | @timestamp | host | avg(cpu_usage) | - |---------------------+--------+----------------| - | 2024-07-01 00:00:00 | web-03 | 55.3 | - | 2024-07-01 00:00:00 | web-07 | 48.6 | - | 2024-07-01 00:00:00 | web-09 | 67.8 | - +---------------------+--------+----------------+ - -Example 10: Handling null values in the "by" field -================================================== - -This example shows how null values in the "by" field are treated as a separate category. The dataset events_null has 1 entry that does not have a host field. -It is put into a separate "NULL" category because the defaults for ``usenull`` and ``nullstr`` are ``true`` and ``"NULL"`` respectively. - -PPL query:: - - os> source=events_null | timechart span=1h count() by host - fetched rows / total rows = 4/4 - +---------------------+--------+---------+ - | @timestamp | host | count() | - |---------------------+--------+---------| - | 2024-07-01 00:00:00 | NULL | 1 | - | 2024-07-01 00:00:00 | db-01 | 1 | - | 2024-07-01 00:00:00 | web-01 | 2 | - | 2024-07-01 00:00:00 | web-02 | 2 | - +---------------------+--------+---------+ - -Example 11: Calculate packets per second rate -============================================= - -This example calculates the per-second packet rate for network traffic data using the per_second() function. - -PPL query:: - - os> source=events | timechart span=30m per_second(packets) by host - fetched rows / total rows = 4/4 - +---------------------+---------+---------------------+ - | @timestamp | host | per_second(packets) | - |---------------------+---------+---------------------| - | 2023-01-01 10:00:00 | server1 | 0.1 | - | 2023-01-01 10:00:00 | server2 | 0.05 | - | 2023-01-01 10:30:00 | server1 | 0.1 | - | 2023-01-01 10:30:00 | server2 | 0.05 | - +---------------------+---------+---------------------+ - -Limitations -=========== -* Only a single aggregation function is supported per timechart command. -* The ``bins`` parameter and other bin options are not supported since the ``bin`` command is not implemented yet. Use the ``span`` parameter to control time intervals. - diff --git a/docs/user/ppl/cmd/top.md b/docs/user/ppl/cmd/top.md new file mode 100644 index 00000000000..fa644f2a117 --- /dev/null +++ b/docs/user/ppl/cmd/top.md @@ -0,0 +1,164 @@ +# top + +## Description + +The `top` command finds the most common tuple of values of all fields in the field list. +## Syntax + +top [N] [top-options] \ [by-clause] +* N: optional. number of results to return. **Default**: 10 +* top-options: optional. options for the top command. Supported syntax is [countfield=\] [showcount=\]. + * showcount=\: optional. whether to create a field in output that represent a count of the tuple of values. **Default:** true. + * countfield=\: optional. the name of the field that contains count. **Default:** 'count'. + * usenull=\: optional (since 3.4.0). whether to output the null value. **Default:** Determined by `plugins.ppl.syntax.legacy.preferred`. + * When `plugins.ppl.syntax.legacy.preferred=true`, `usenull` defaults to `true` + * When `plugins.ppl.syntax.legacy.preferred=false`, `usenull` defaults to `false` +* field-list: mandatory. comma-delimited list of field names. +* by-clause: optional. one or more fields to group the results by. + +## Example 1: Find the most common values in a field + +This example finds the most common gender of all the accounts. + +```ppl +source=accounts +| top showcount=false gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+ +| gender | +|--------| +| M | +| F | ++--------+ +``` + +## Example 2: Limit results to top N values + +This example finds the most common gender and limits results to 1 value. + +```ppl +source=accounts +| top 1 showcount=false gender +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| gender | +|--------| +| M | ++--------+ +``` + +## Example 3: Find the most common values grouped by field + +This example finds the most common age of all the accounts grouped by gender. + +```ppl +source=accounts +| top 1 showcount=false age by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+-----+ +| gender | age | +|--------+-----| +| F | 28 | +| M | 32 | ++--------+-----+ +``` + +## Example 4: Top command with count field + +This example finds the most common gender of all the accounts and includes the count. + +```ppl +source=accounts +| top gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+-------+ +| gender | count | +|--------+-------| +| M | 3 | +| F | 1 | ++--------+-------+ +``` + +## Example 5: Specify the count field option + +This example specifies a custom name for the count field. + +```ppl +source=accounts +| top countfield='cnt' gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+-----+ +| gender | cnt | +|--------+-----| +| M | 3 | +| F | 1 | ++--------+-----+ +``` + +## Example 5: Specify the usenull field option + +```ppl +source=accounts +| top usenull=false email +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----------------------+-------+ +| email | count | +|-----------------------+-------| +| amberduke@pyrami.com | 1 | +| daleadams@boink.com | 1 | +| hattiebond@netagy.com | 1 | ++-----------------------+-------+ +``` + +```ppl +source=accounts +| top usenull=true email +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------------------+-------+ +| email | count | +|-----------------------+-------| +| null | 1 | +| amberduke@pyrami.com | 1 | +| daleadams@boink.com | 1 | +| hattiebond@netagy.com | 1 | ++-----------------------+-------+ +``` + +## Limitations + +The `top` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. \ No newline at end of file diff --git a/docs/user/ppl/cmd/top.rst b/docs/user/ppl/cmd/top.rst deleted file mode 100644 index bdf22addf40..00000000000 --- a/docs/user/ppl/cmd/top.rst +++ /dev/null @@ -1,145 +0,0 @@ -=== -top -=== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``top`` command finds the most common tuple of values of all fields in the field list. - -Syntax -====== -top [N] [top-options] [by-clause] - -* N: optional. number of results to return. **Default**: 10 -* top-options: optional. options for the top command. Supported syntax is [countfield=] [showcount=]. - - * showcount=: optional. whether to create a field in output that represent a count of the tuple of values. **Default:** true. - * countfield=: optional. the name of the field that contains count. **Default:** 'count'. - * usenull=: optional (since 3.4.0). whether to output the null value. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false`` - -* field-list: mandatory. comma-delimited list of field names. -* by-clause: optional. one or more fields to group the results by. - -Example 1: Find the most common values in a field -================================================= - -This example finds the most common gender of all the accounts. - -PPL query:: - - os> source=accounts | top showcount=false gender; - fetched rows / total rows = 2/2 - +--------+ - | gender | - |--------| - | M | - | F | - +--------+ - -Example 2: Limit results to top N values -======================================== - -This example finds the most common gender and limits results to 1 value. - -PPL query:: - - os> source=accounts | top 1 showcount=false gender; - fetched rows / total rows = 1/1 - +--------+ - | gender | - |--------| - | M | - +--------+ - -Example 3: Find the most common values grouped by field -======================================================= - -This example finds the most common age of all the accounts grouped by gender. - -PPL query:: - - os> source=accounts | top 1 showcount=false age by gender; - fetched rows / total rows = 2/2 - +--------+-----+ - | gender | age | - |--------+-----| - | F | 28 | - | M | 32 | - +--------+-----+ - -Example 4: Top command with count field -======================================= - -This example finds the most common gender of all the accounts and includes the count. - -PPL query:: - - os> source=accounts | top gender; - fetched rows / total rows = 2/2 - +--------+-------+ - | gender | count | - |--------+-------| - | M | 3 | - | F | 1 | - +--------+-------+ - - -Example 5: Specify the count field option -========================================= - -This example specifies a custom name for the count field. - -PPL query:: - - os> source=accounts | top countfield='cnt' gender; - fetched rows / total rows = 2/2 - +--------+-----+ - | gender | cnt | - |--------+-----| - | M | 3 | - | F | 1 | - +--------+-----+ - - -Example 5: Specify the usenull field option -=========================================== - -PPL query:: - - os> source=accounts | top usenull=false email; - fetched rows / total rows = 3/3 - +-----------------------+-------+ - | email | count | - |-----------------------+-------| - | amberduke@pyrami.com | 1 | - | daleadams@boink.com | 1 | - | hattiebond@netagy.com | 1 | - +-----------------------+-------+ - -PPL query:: - - os> source=accounts | top usenull=true email; - fetched rows / total rows = 4/4 - +-----------------------+-------+ - | email | count | - |-----------------------+-------| - | null | 1 | - | amberduke@pyrami.com | 1 | - | daleadams@boink.com | 1 | - | hattiebond@netagy.com | 1 | - +-----------------------+-------+ - - -Limitations -=========== -The ``top`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. diff --git a/docs/user/ppl/cmd/trendline.md b/docs/user/ppl/cmd/trendline.md new file mode 100644 index 00000000000..8fee5d3c399 --- /dev/null +++ b/docs/user/ppl/cmd/trendline.md @@ -0,0 +1,114 @@ +# trendline + +## Description + +The `trendline` command calculates moving averages of fields. +## Syntax + +trendline [sort <[+\|-] sort-field>] [sma\|wma](number-of-datapoints, field) [as \] [[sma\|wma](number-of-datapoints, field) [as \]]... +* [+\|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. +* sort-field: mandatory when sorting is used. The field used to sort. +* sma\|wma: mandatory. Simple Moving Average (sma) applies equal weighting to all values, Weighted Moving Average (wma) applies greater weight to more recent values. +* number-of-datapoints: mandatory. The number of datapoints to calculate the moving average (must be greater than zero). +* field: mandatory. The name of the field the moving average should be calculated for. +* alias: optional. The name of the resulting column containing the moving average. **Default:** field name with "_trendline". + +## Example 1: Calculate the simple moving average on one field. + +This example shows how to calculate the simple moving average on one field. + +```ppl +source=accounts +| trendline sma(2, account_number) as an +| fields an +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++------+ +| an | +|------| +| null | +| 3.5 | +| 9.5 | +| 15.5 | ++------+ +``` + +## Example 2: Calculate the simple moving average on multiple fields. + +This example shows how to calculate the simple moving average on multiple fields. + +```ppl +source=accounts +| trendline sma(2, account_number) as an sma(2, age) as age_trend +| fields an, age_trend +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++------+-----------+ +| an | age_trend | +|------+-----------| +| null | null | +| 3.5 | 34.0 | +| 9.5 | 32.0 | +| 15.5 | 30.5 | ++------+-----------+ +``` + +## Example 3: Calculate the simple moving average on one field without specifying an alias. + +This example shows how to calculate the simple moving average on one field. + +```ppl +source=accounts +| trendline sma(2, account_number) +| fields account_number_trendline +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------------------------+ +| account_number_trendline | +|--------------------------| +| null | +| 3.5 | +| 9.5 | +| 15.5 | ++--------------------------+ +``` + +## Example 4: Calculate the weighted moving average on one field. + +This example shows how to calculate the weighted moving average on one field. + +```ppl +source=accounts +| trendline wma(2, account_number) +| fields account_number_trendline +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------------------------+ +| account_number_trendline | +|--------------------------| +| null | +| 4.333333333333333 | +| 10.666666666666666 | +| 16.333333333333332 | ++--------------------------+ +``` + +## Limitations + +The `trendline` command requires all values in the specified `field` to be non-null. Any rows with null values present in the calculation field will be automatically excluded from the command's output. \ No newline at end of file diff --git a/docs/user/ppl/cmd/trendline.rst b/docs/user/ppl/cmd/trendline.rst deleted file mode 100644 index e2fd067d262..00000000000 --- a/docs/user/ppl/cmd/trendline.rst +++ /dev/null @@ -1,103 +0,0 @@ -========= -trendline -========= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``trendline`` command calculates moving averages of fields. - -Syntax -====== -trendline [sort <[+|-] sort-field>] [sma|wma](number-of-datapoints, field) [as ] [[sma|wma](number-of-datapoints, field) [as ]]... - -* [+|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. -* sort-field: mandatory when sorting is used. The field used to sort. -* sma|wma: mandatory. Simple Moving Average (sma) applies equal weighting to all values, Weighted Moving Average (wma) applies greater weight to more recent values. -* number-of-datapoints: mandatory. The number of datapoints to calculate the moving average (must be greater than zero). -* field: mandatory. The name of the field the moving average should be calculated for. -* alias: optional. The name of the resulting column containing the moving average. **Default:** field name with "_trendline". - - -Example 1: Calculate the simple moving average on one field. -============================================================ - -This example shows how to calculate the simple moving average on one field. - -PPL query:: - - os> source=accounts | trendline sma(2, account_number) as an | fields an; - fetched rows / total rows = 4/4 - +------+ - | an | - |------| - | null | - | 3.5 | - | 9.5 | - | 15.5 | - +------+ - - -Example 2: Calculate the simple moving average on multiple fields. -================================================================== - -This example shows how to calculate the simple moving average on multiple fields. - -PPL query:: - - os> source=accounts | trendline sma(2, account_number) as an sma(2, age) as age_trend | fields an, age_trend ; - fetched rows / total rows = 4/4 - +------+-----------+ - | an | age_trend | - |------+-----------| - | null | null | - | 3.5 | 34.0 | - | 9.5 | 32.0 | - | 15.5 | 30.5 | - +------+-----------+ - -Example 3: Calculate the simple moving average on one field without specifying an alias. -======================================================================================== - -This example shows how to calculate the simple moving average on one field. - -PPL query:: - - os> source=accounts | trendline sma(2, account_number) | fields account_number_trendline; - fetched rows / total rows = 4/4 - +--------------------------+ - | account_number_trendline | - |--------------------------| - | null | - | 3.5 | - | 9.5 | - | 15.5 | - +--------------------------+ - -Example 4: Calculate the weighted moving average on one field. -============================================================== - -This example shows how to calculate the weighted moving average on one field. - -PPL query:: - - PPL> source=accounts | trendline wma(2, account_number) | fields account_number_trendline; - fetched rows / total rows = 4/4 - +--------------------------+ - | account_number_trendline | - |--------------------------| - | null | - | 4.333333333333333 | - | 10.666666666666666 | - | 16.333333333333332 | - +--------------------------+ - -Limitations -=========== -The ``trendline`` command requires all values in the specified ``field`` to be non-null. Any rows with null values present in the calculation field will be automatically excluded from the command's output. \ No newline at end of file diff --git a/docs/user/ppl/cmd/where.md b/docs/user/ppl/cmd/where.md new file mode 100644 index 00000000000..9a96d9b7d47 --- /dev/null +++ b/docs/user/ppl/cmd/where.md @@ -0,0 +1,207 @@ +# where + +## Description + +The `where` command filters the search result. The `where` command only returns the result when the bool-expression evaluates to true. +## Syntax + +where \ +* bool-expression: optional. Any expression which could be evaluated to boolean value. + +## Example 1: Filter result set with condition + +This example shows fetching all the documents from the accounts index where account_number is 1 or gender is "F". + +```ppl +source=accounts +| where account_number=1 or gender="F" +| fields account_number, gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------+--------+ +| account_number | gender | +|----------------+--------| +| 1 | M | +| 13 | F | ++----------------+--------+ +``` + +## Example 2: Basic Field Comparison + +The example shows how to filter accounts with balance greater than 30000. + +```ppl +source=accounts +| where balance > 30000 +| fields account_number, balance +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------+---------+ +| account_number | balance | +|----------------+---------| +| 1 | 39225 | +| 13 | 32838 | ++----------------+---------+ +``` + +## Example 3: Pattern Matching with LIKE + +Pattern Matching with Underscore (_) +The example demonstrates using LIKE with underscore (_) to match a single character. + +```ppl +source=accounts +| where LIKE(state, 'M_') +| fields account_number, state +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+-------+ +| account_number | state | +|----------------+-------| +| 18 | MD | ++----------------+-------+ +``` + +Pattern Matching with Percent (%) +The example demonstrates using LIKE with percent (%) to match multiple characters. + +```ppl +source=accounts +| where LIKE(state, 'V%') +| fields account_number, state +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+-------+ +| account_number | state | +|----------------+-------| +| 13 | VA | ++----------------+-------+ +``` + +## Example 4: Multiple Conditions + +The example shows how to combine multiple conditions using AND operator. + +```ppl +source=accounts +| where age > 30 AND gender = 'M' +| fields account_number, age, gender +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+-----+--------+ +| account_number | age | gender | +|----------------+-----+--------| +| 1 | 32 | M | +| 6 | 36 | M | +| 18 | 33 | M | ++----------------+-----+--------+ +``` + +## Example 5: Using IN Operator + +The example demonstrates using IN operator to match multiple values. + +```ppl +source=accounts +| where state IN ('IL', 'VA') +| fields account_number, state +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------+-------+ +| account_number | state | +|----------------+-------| +| 1 | IL | +| 13 | VA | ++----------------+-------+ +``` + +## Example 6: NULL Checks + +The example shows how to filter records with NULL values. + +```ppl +source=accounts +| where ISNULL(employer) +| fields account_number, employer +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+----------+ +| account_number | employer | +|----------------+----------| +| 18 | null | ++----------------+----------+ +``` + +## Example 7: Complex Conditions + +The example demonstrates combining multiple conditions with parentheses and logical operators. + +```ppl +source=accounts +| where (balance > 40000 OR age > 35) AND gender = 'M' +| fields account_number, balance, age, gender +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+---------+-----+--------+ +| account_number | balance | age | gender | +|----------------+---------+-----+--------| +| 6 | 5686 | 36 | M | ++----------------+---------+-----+--------+ +``` + +## Example 8: NOT Conditions + +The example shows how to use NOT operator to exclude matching records. + +```ppl +source=accounts +| where NOT state = 'CA' +| fields account_number, state +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+ +| account_number | state | +|----------------+-------| +| 1 | IL | +| 6 | TN | +| 13 | VA | +| 18 | MD | ++----------------+-------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/cmd/where.rst b/docs/user/ppl/cmd/where.rst deleted file mode 100644 index 324af4dcb54..00000000000 --- a/docs/user/ppl/cmd/where.rst +++ /dev/null @@ -1,165 +0,0 @@ -===== -where -===== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -=========== -| The ``where`` command filters the search result. The ``where`` command only returns the result when the bool-expression evaluates to true. - -Syntax -====== -where - -* bool-expression: optional. Any expression which could be evaluated to boolean value. - -Example 1: Filter result set with condition -=========================================== - -This example shows fetching all the documents from the accounts index where account_number is 1 or gender is "F". - -PPL query:: - - os> source=accounts | where account_number=1 or gender="F" | fields account_number, gender; - fetched rows / total rows = 2/2 - +----------------+--------+ - | account_number | gender | - |----------------+--------| - | 1 | M | - | 13 | F | - +----------------+--------+ - -Example 2: Basic Field Comparison -================================= - -The example shows how to filter accounts with balance greater than 30000. - -PPL query:: - - os> source=accounts | where balance > 30000 | fields account_number, balance; - fetched rows / total rows = 2/2 - +----------------+---------+ - | account_number | balance | - |----------------+---------| - | 1 | 39225 | - | 13 | 32838 | - +----------------+---------+ - -Example 3: Pattern Matching with LIKE -===================================== - -Pattern Matching with Underscore (_) - -The example demonstrates using LIKE with underscore (_) to match a single character. - -PPL query:: - - os> source=accounts | where LIKE(state, 'M_') | fields account_number, state; - fetched rows / total rows = 1/1 - +----------------+-------+ - | account_number | state | - |----------------+-------| - | 18 | MD | - +----------------+-------+ - -Pattern Matching with Percent (%) - -The example demonstrates using LIKE with percent (%) to match multiple characters. - -PPL query:: - - os> source=accounts | where LIKE(state, 'V%') | fields account_number, state; - fetched rows / total rows = 1/1 - +----------------+-------+ - | account_number | state | - |----------------+-------| - | 13 | VA | - +----------------+-------+ - -Example 4: Multiple Conditions -============================== - -The example shows how to combine multiple conditions using AND operator. - -PPL query:: - - os> source=accounts | where age > 30 AND gender = 'M' | fields account_number, age, gender; - fetched rows / total rows = 3/3 - +----------------+-----+--------+ - | account_number | age | gender | - |----------------+-----+--------| - | 1 | 32 | M | - | 6 | 36 | M | - | 18 | 33 | M | - +----------------+-----+--------+ - -Example 5: Using IN Operator -============================ - -The example demonstrates using IN operator to match multiple values. - -PPL query:: - - os> source=accounts | where state IN ('IL', 'VA') | fields account_number, state; - fetched rows / total rows = 2/2 - +----------------+-------+ - | account_number | state | - |----------------+-------| - | 1 | IL | - | 13 | VA | - +----------------+-------+ - -Example 6: NULL Checks -====================== - -The example shows how to filter records with NULL values. - -PPL query:: - - os> source=accounts | where ISNULL(employer) | fields account_number, employer; - fetched rows / total rows = 1/1 - +----------------+----------+ - | account_number | employer | - |----------------+----------| - | 18 | null | - +----------------+----------+ - -Example 7: Complex Conditions -============================= - -The example demonstrates combining multiple conditions with parentheses and logical operators. - -PPL query:: - - os> source=accounts | where (balance > 40000 OR age > 35) AND gender = 'M' | fields account_number, balance, age, gender; - fetched rows / total rows = 1/1 - +----------------+---------+-----+--------+ - | account_number | balance | age | gender | - |----------------+---------+-----+--------| - | 6 | 5686 | 36 | M | - +----------------+---------+-----+--------+ - -Example 8: NOT Conditions -========================= - -The example shows how to use NOT operator to exclude matching records. - -PPL query:: - - os> source=accounts | where NOT state = 'CA' | fields account_number, state; - fetched rows / total rows = 4/4 - +----------------+-------+ - | account_number | state | - |----------------+-------| - | 1 | IL | - | 6 | TN | - | 13 | VA | - | 18 | MD | - +----------------+-------+ - diff --git a/docs/user/ppl/functions/aggregations.md b/docs/user/ppl/functions/aggregations.md new file mode 100644 index 00000000000..c11a7687cb8 --- /dev/null +++ b/docs/user/ppl/functions/aggregations.md @@ -0,0 +1,653 @@ +# Aggregation Functions + +## Description + +Aggregation functions perform calculations across multiple rows to return a single result value. These functions are used with `stats` and `eventstats` commands to analyze and summarize data. +The following table shows how NULL/MISSING values are handled by aggregation functions: + +| Function | NULL | MISSING | +| --- | --- | --- | +| COUNT | Not counted | Not counted | +| SUM | Ignore | Ignore | +| AVG | Ignore | Ignore | +| MAX | Ignore | Ignore | +| MIN | Ignore | Ignore | +| FIRST | Ignore | Ignore | +| LAST | Ignore | Ignore | +| LIST | Ignore | Ignore | +| VALUES | Ignore | Ignore | + +## Functions + +### COUNT + +#### Description + +Usage: Returns a count of the number of expr in the rows retrieved. The `C()` function, `c`, and `count` can be used as abbreviations for `COUNT()`. To perform a filtered counting, wrap the condition to satisfy in an `eval` expression. +Example + +```ppl +source=accounts +| stats count(), c(), count, c +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+-----+-------+---+ +| count() | c() | count | c | +|---------+-----+-------+---| +| 4 | 4 | 4 | 4 | ++---------+-----+-------+---+ +``` + +Example of filtered counting + +```ppl +source=accounts +| stats count(eval(age > 30)) as mature_users +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| mature_users | +|--------------| +| 3 | ++--------------+ +``` + +### SUM + +#### Description + +Usage: SUM(expr). Returns the sum of expr. +Example + +```ppl +source=accounts +| stats sum(age) by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------+--------+ +| sum(age) | gender | +|----------+--------| +| 28 | F | +| 101 | M | ++----------+--------+ +``` + +### AVG + +#### Description + +Usage: AVG(expr). Returns the average value of expr. +Example + +```ppl +source=accounts +| stats avg(age) by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------------------+--------+ +| avg(age) | gender | +|--------------------+--------| +| 28.0 | F | +| 33.666666666666664 | M | ++--------------------+--------+ +``` + +### MAX + +#### Description + +Usage: MAX(expr). Returns the maximum value of expr. +For non-numeric fields, values are sorted lexicographically. +Example + +```ppl +source=accounts +| stats max(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| max(age) | +|----------| +| 36 | ++----------+ +``` + +Example with text field + +```ppl +source=accounts +| stats max(firstname) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+ +| max(firstname) | +|----------------| +| Nanette | ++----------------+ +``` + +### MIN + +#### Description + +Usage: MIN(expr). Returns the minimum value of expr. +For non-numeric fields, values are sorted lexicographically. +Example + +```ppl +source=accounts +| stats min(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| min(age) | +|----------| +| 28 | ++----------+ +``` + +Example with text field + +```ppl +source=accounts +| stats min(firstname) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+ +| min(firstname) | +|----------------| +| Amber | ++----------------+ +``` + +### VAR_SAMP + +#### Description + +Usage: VAR_SAMP(expr). Returns the sample variance of expr. +Example + +```ppl +source=accounts +| stats var_samp(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| var_samp(age) | +|--------------------| +| 10.916666666666666 | ++--------------------+ +``` + +### VAR_POP + +#### Description + +Usage: VAR_POP(expr). Returns the population standard variance of expr. +Example + +```ppl +source=accounts +| stats var_pop(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| var_pop(age) | +|--------------| +| 8.1875 | ++--------------+ +``` + +### STDDEV_SAMP + +#### Description + +Usage: STDDEV_SAMP(expr). Return the sample standard deviation of expr. +Example + +```ppl +source=accounts +| stats stddev_samp(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+ +| stddev_samp(age) | +|-------------------| +| 3.304037933599835 | ++-------------------+ +``` + +### STDDEV_POP + +#### Description + +Usage: STDDEV_POP(expr). Return the population standard deviation of expr. +Example + +```ppl +source=accounts +| stats stddev_pop(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| stddev_pop(age) | +|--------------------| +| 2.8613807855648994 | ++--------------------+ +``` + +### DISTINCT_COUNT, DC + +#### Description + +Usage: DISTINCT_COUNT(expr), DC(expr). Returns the approximate number of distinct values using the HyperLogLog++ algorithm. Both functions are equivalent. +For details on algorithm accuracy and precision control, see the [OpenSearch Cardinality Aggregation documentation](https://docs.opensearch.org/latest/aggregations/metric/cardinality/#controlling-precision). +Example + +```ppl +source=accounts +| stats dc(state) as distinct_states, distinct_count(state) as dc_states_alt by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------------+---------------+--------+ +| distinct_states | dc_states_alt | gender | +|-----------------+---------------+--------| +| 1 | 1 | F | +| 3 | 3 | M | ++-----------------+---------------+--------+ +``` + +### DISTINCT_COUNT_APPROX + +#### Description + +Usage: DISTINCT_COUNT_APPROX(expr). Return the approximate distinct count value of the expr, using the hyperloglog++ algorithm. +Example + +```ppl +source=accounts +| stats distinct_count_approx(gender) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------+ +| distinct_count_approx(gender) | +|-------------------------------| +| 2 | ++-------------------------------+ +``` + +### EARLIEST + +#### Description + +Usage: EARLIEST(field [, time_field]). Return the earliest value of a field based on timestamp ordering. +* field: mandatory. The field to return the earliest value for. +* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. + +Example + +```ppl +source=events +| stats earliest(message) by host +| sort host +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-------------------+---------+ +| earliest(message) | host | +|-------------------+---------| +| Starting up | server1 | +| Initializing | server2 | ++-------------------+---------+ +``` + +Example with custom time field + +```ppl +source=events +| stats earliest(status, event_time) by category +| sort category +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++------------------------------+----------+ +| earliest(status, event_time) | category | +|------------------------------+----------| +| pending | orders | +| active | users | ++------------------------------+----------+ +``` + +### LATEST + +#### Description + +Usage: LATEST(field [, time_field]). Return the latest value of a field based on timestamp ordering. +* field: mandatory. The field to return the latest value for. +* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. + +Example + +```ppl +source=events +| stats latest(message) by host +| sort host +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++------------------+---------+ +| latest(message) | host | +|------------------+---------| +| Shutting down | server1 | +| Maintenance mode | server2 | ++------------------+---------+ +``` + +Example with custom time field + +```ppl +source=events +| stats latest(status, event_time) by category +| sort category +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------------------+----------+ +| latest(status, event_time) | category | +|----------------------------+----------| +| cancelled | orders | +| inactive | users | ++----------------------------+----------+ +``` + +### TAKE + +#### Description + +Usage: TAKE(field [, size]). Return original values of a field. It does not guarantee on the order of values. +* field: mandatory. The field must be a text field. +* size: optional integer. The number of values should be returned. Default is 10. + +Example + +```ppl +source=accounts +| stats take(firstname) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+ +| take(firstname) | +|-----------------------------| +| [Amber,Hattie,Nanette,Dale] | ++-----------------------------+ +``` + +### PERCENTILE or PERCENTILE_APPROX + +#### Description + +Usage: PERCENTILE(expr, percent) or PERCENTILE_APPROX(expr, percent). Return the approximate percentile value of expr at the specified percentage. +* percent: The number must be a constant between 0 and 100. + +Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref [issue link](https://github.com/opensearch-project/OpenSearch/issues/18122). +Example + +```ppl +source=accounts +| stats percentile(age, 90) by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++---------------------+--------+ +| percentile(age, 90) | gender | +|---------------------+--------| +| 28 | F | +| 36 | M | ++---------------------+--------+ +``` + +#### Percentile Shortcut Functions + +For convenience, OpenSearch PPL provides shortcut functions for common percentiles: +- `PERC(expr)` - Equivalent to `PERCENTILE(expr, )` +- `P(expr)` - Equivalent to `PERCENTILE(expr, )` + +Both integer and decimal percentiles from 0 to 100 are supported (e.g., `PERC95`, `P99.5`). + +```ppl +source=accounts +| stats perc99.5(age); +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------+ +| perc99.5(age) | +|---------------| +| 36 | ++---------------+ +``` + +```ppl +source=accounts +| stats p50(age); +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| p50(age) | +|----------| +| 33 | ++----------+ +``` + +### MEDIAN + +#### Description + +Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to `PERCENTILE(expr, 50)`. +Example + +```ppl +source=accounts +| stats median(age) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------+ +| median(age) | +|-------------| +| 33 | ++-------------+ +``` + +### FIRST + +#### Description + +Usage: FIRST(field). Return the first non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. +* field: mandatory. The field to return the first value for. + +Example + +```ppl +source=accounts +| stats first(firstname) by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++------------------+--------+ +| first(firstname) | gender | +|------------------+--------| +| Nanette | F | +| Amber | M | ++------------------+--------+ +``` + +### LAST + +#### Description + +Usage: LAST(field). Return the last non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. +* field: mandatory. The field to return the last value for. + +Example + +```ppl +source=accounts +| stats last(firstname) by gender +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------------+--------+ +| last(firstname) | gender | +|-----------------+--------| +| Nanette | F | +| Dale | M | ++-----------------+--------+ +``` + +### LIST + +#### Description + +Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved. +The function returns up to 100 values with no guaranteed ordering. +* expr: The field expression to collect values from. +* This aggregation function doesn't support Array, Struct, Object field types. + +Example with string fields + +```ppl +source=accounts +| stats list(firstname) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+ +| list(firstname) | +|-----------------------------| +| [Amber,Hattie,Nanette,Dale] | ++-----------------------------+ +``` + +### VALUES + +#### Description + +Usage: VALUES(expr). Collects all unique values from the specified expression into a sorted array. Values are converted to strings, nulls are filtered, and duplicates are removed. +The maximum number of unique values returned is controlled by the `plugins.ppl.values.max.limit` setting: +* Default value is 0, which means unlimited values are returned +* Can be configured to any positive integer to limit the number of unique values +* See the [PPL Settings](../admin/settings.md#plugins-ppl-values-max-limit) documentation for more details + +Example with string fields + +```ppl +source=accounts +| stats values(firstname) +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+ +| values(firstname) | +|-----------------------------| +| [Amber,Dale,Hattie,Nanette] | ++-----------------------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/aggregations.rst b/docs/user/ppl/functions/aggregations.rst deleted file mode 100644 index 6605bda0765..00000000000 --- a/docs/user/ppl/functions/aggregations.rst +++ /dev/null @@ -1,522 +0,0 @@ -===================== -Aggregation Functions -===================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Description -============ -| Aggregation functions perform calculations across multiple rows to return a single result value. These functions are used with ``stats`` and ``eventstats`` commands to analyze and summarize data. - -| The following table shows how NULL/MISSING values are handled by aggregation functions: - -+----------+-------------+-------------+ -| Function | NULL | MISSING | -+----------+-------------+-------------+ -| COUNT | Not counted | Not counted | -+----------+-------------+-------------+ -| SUM | Ignore | Ignore | -+----------+-------------+-------------+ -| AVG | Ignore | Ignore | -+----------+-------------+-------------+ -| MAX | Ignore | Ignore | -+----------+-------------+-------------+ -| MIN | Ignore | Ignore | -+----------+-------------+-------------+ -| FIRST | Ignore | Ignore | -+----------+-------------+-------------+ -| LAST | Ignore | Ignore | -+----------+-------------+-------------+ -| LIST | Ignore | Ignore | -+----------+-------------+-------------+ -| VALUES | Ignore | Ignore | -+----------+-------------+-------------+ - -Functions -========= - -COUNT ------ - -Description ->>>>>>>>>>> - -Usage: Returns a count of the number of expr in the rows retrieved. The ``C()`` function, ``c``, and ``count`` can be used as abbreviations for ``COUNT()``. To perform a filtered counting, wrap the condition to satisfy in an `eval` expression. - -Example:: - - os> source=accounts | stats count(), c(), count, c; - fetched rows / total rows = 1/1 - +---------+-----+-------+---+ - | count() | c() | count | c | - |---------+-----+-------+---| - | 4 | 4 | 4 | 4 | - +---------+-----+-------+---+ - -Example of filtered counting:: - - os> source=accounts | stats count(eval(age > 30)) as mature_users; - fetched rows / total rows = 1/1 - +--------------+ - | mature_users | - |--------------| - | 3 | - +--------------+ - -SUM ---- - -Description ->>>>>>>>>>> - -Usage: SUM(expr). Returns the sum of expr. - -Example:: - - os> source=accounts | stats sum(age) by gender; - fetched rows / total rows = 2/2 - +----------+--------+ - | sum(age) | gender | - |----------+--------| - | 28 | F | - | 101 | M | - +----------+--------+ - -AVG ---- - -Description ->>>>>>>>>>> - -Usage: AVG(expr). Returns the average value of expr. - -Example:: - - os> source=accounts | stats avg(age) by gender; - fetched rows / total rows = 2/2 - +--------------------+--------+ - | avg(age) | gender | - |--------------------+--------| - | 28.0 | F | - | 33.666666666666664 | M | - +--------------------+--------+ - -MAX ---- - -Description ->>>>>>>>>>> - -Usage: MAX(expr). Returns the maximum value of expr. - -For non-numeric fields, values are sorted lexicographically. - -Example:: - - os> source=accounts | stats max(age); - fetched rows / total rows = 1/1 - +----------+ - | max(age) | - |----------| - | 36 | - +----------+ - -Example with text field:: - - os> source=accounts | stats max(firstname); - fetched rows / total rows = 1/1 - +----------------+ - | max(firstname) | - |----------------| - | Nanette | - +----------------+ - -MIN ---- - -Description ->>>>>>>>>>> - -Usage: MIN(expr). Returns the minimum value of expr. - -For non-numeric fields, values are sorted lexicographically. - -Example:: - - os> source=accounts | stats min(age); - fetched rows / total rows = 1/1 - +----------+ - | min(age) | - |----------| - | 28 | - +----------+ - -Example with text field:: - - os> source=accounts | stats min(firstname); - fetched rows / total rows = 1/1 - +----------------+ - | min(firstname) | - |----------------| - | Amber | - +----------------+ - -VAR_SAMP --------- - -Description ->>>>>>>>>>> - -Usage: VAR_SAMP(expr). Returns the sample variance of expr. - -Example:: - - os> source=accounts | stats var_samp(age); - fetched rows / total rows = 1/1 - +--------------------+ - | var_samp(age) | - |--------------------| - | 10.916666666666666 | - +--------------------+ - -VAR_POP -------- - -Description ->>>>>>>>>>> - -Usage: VAR_POP(expr). Returns the population standard variance of expr. - -Example:: - - os> source=accounts | stats var_pop(age); - fetched rows / total rows = 1/1 - +--------------+ - | var_pop(age) | - |--------------| - | 8.1875 | - +--------------+ - -STDDEV_SAMP ------------ - -Description ->>>>>>>>>>> - -Usage: STDDEV_SAMP(expr). Return the sample standard deviation of expr. - -Example:: - - os> source=accounts | stats stddev_samp(age); - fetched rows / total rows = 1/1 - +-------------------+ - | stddev_samp(age) | - |-------------------| - | 3.304037933599835 | - +-------------------+ - -STDDEV_POP ----------- - -Description ->>>>>>>>>>> - -Usage: STDDEV_POP(expr). Return the population standard deviation of expr. - -Example:: - - os> source=accounts | stats stddev_pop(age); - fetched rows / total rows = 1/1 - +--------------------+ - | stddev_pop(age) | - |--------------------| - | 2.8613807855648994 | - +--------------------+ - -DISTINCT_COUNT, DC ------------------- - -Description ->>>>>>>>>>> - -Usage: DISTINCT_COUNT(expr), DC(expr). Returns the approximate number of distinct values using the HyperLogLog++ algorithm. Both functions are equivalent. - -For details on algorithm accuracy and precision control, see the `OpenSearch Cardinality Aggregation documentation `_. - -Example:: - - os> source=accounts | stats dc(state) as distinct_states, distinct_count(state) as dc_states_alt by gender; - fetched rows / total rows = 4/4 - +-----------------+---------------+--------+ - | distinct_states | dc_states_alt | gender | - |-----------------+---------------+--------| - | 3 | 3 | M | - | 1 | 1 | F | - +-----------------+---------------+--------| - -DISTINCT_COUNT_APPROX ---------------------- - -Description ->>>>>>>>>>> - -Usage: DISTINCT_COUNT_APPROX(expr). Return the approximate distinct count value of the expr, using the hyperloglog++ algorithm. - -Example:: - - PPL> source=accounts | stats distinct_count_approx(gender); - fetched rows / total rows = 1/1 - +-------------------------------+ - | distinct_count_approx(gender) | - |-------------------------------| - | 2 | - +-------------------------------+ - -EARLIEST --------- - -Description ->>>>>>>>>>> - -Usage: EARLIEST(field [, time_field]). Return the earliest value of a field based on timestamp ordering. - -* field: mandatory. The field to return the earliest value for. -* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. - -Example:: - - os> source=events | stats earliest(message) by host | sort host; - fetched rows / total rows = 2/2 - +-------------------+---------+ - | earliest(message) | host | - |-------------------+---------| - | Starting up | server1 | - | Initializing | server2 | - +-------------------+---------+ - -Example with custom time field:: - - os> source=events | stats earliest(status, event_time) by category | sort category; - fetched rows / total rows = 2/2 - +------------------------------+----------+ - | earliest(status, event_time) | category | - |------------------------------+----------| - | pending | orders | - | active | users | - +------------------------------+----------+ - -LATEST ------- - -Description ->>>>>>>>>>> - -Usage: LATEST(field [, time_field]). Return the latest value of a field based on timestamp ordering. - -* field: mandatory. The field to return the latest value for. -* time_field: optional. The field to use for time-based ordering. Defaults to @timestamp if not specified. - -Example:: - - os> source=events | stats latest(message) by host | sort host; - fetched rows / total rows = 2/2 - +------------------+---------+ - | latest(message) | host | - |------------------+---------| - | Shutting down | server1 | - | Maintenance mode | server2 | - +------------------+---------+ - -Example with custom time field:: - - os> source=events | stats latest(status, event_time) by category | sort category; - fetched rows / total rows = 2/2 - +----------------------------+----------+ - | latest(status, event_time) | category | - |----------------------------+----------| - | cancelled | orders | - | inactive | users | - +----------------------------+----------+ - -TAKE ----- - -Description ->>>>>>>>>>> - -Usage: TAKE(field [, size]). Return original values of a field. It does not guarantee on the order of values. - -* field: mandatory. The field must be a text field. -* size: optional integer. The number of values should be returned. Default is 10. - -Example:: - - os> source=accounts | stats take(firstname); - fetched rows / total rows = 1/1 - +-----------------------------+ - | take(firstname) | - |-----------------------------| - | [Amber,Hattie,Nanette,Dale] | - +-----------------------------+ - -PERCENTILE or PERCENTILE_APPROX -------------------------------- - -Description ->>>>>>>>>>> - -Usage: PERCENTILE(expr, percent) or PERCENTILE_APPROX(expr, percent). Return the approximate percentile value of expr at the specified percentage. - -* percent: The number must be a constant between 0 and 100. - -Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref `issue link `_. - -Example:: - - os> source=accounts | stats percentile(age, 90) by gender; - fetched rows / total rows = 2/2 - +---------------------+--------+ - | percentile(age, 90) | gender | - |---------------------+--------| - | 28 | F | - | 36 | M | - +---------------------+--------+ - -Percentile Shortcut Functions ->>>>>>>>>>>>>>>>>>>>>>>>>>>>> - -For convenience, OpenSearch PPL provides shortcut functions for common percentiles: - -- ``PERC(expr)`` - Equivalent to ``PERCENTILE(expr, )`` -- ``P(expr)`` - Equivalent to ``PERCENTILE(expr, )`` - -Both integer and decimal percentiles from 0 to 100 are supported (e.g., ``PERC95``, ``P99.5``). - -Example:: - - ppl> source=accounts | stats perc99.5(age); - fetched rows / total rows = 1/1 - +---------------+ - | perc99.5(age) | - |---------------| - | 36 | - +---------------+ - - ppl> source=accounts | stats p50(age); - fetched rows / total rows = 1/1 - +---------+ - | p50(age) | - |---------| - | 32 | - +---------+ - -MEDIAN ------- - -Description ->>>>>>>>>>> - -Usage: MEDIAN(expr). Returns the median (50th percentile) value of `expr`. This is equivalent to ``PERCENTILE(expr, 50)``. - -Example:: - - os> source=accounts | stats median(age); - fetched rows / total rows = 1/1 - +-------------+ - | median(age) | - |-------------| - | 33 | - +-------------+ - -FIRST ------ - -Description ->>>>>>>>>>> - -Usage: FIRST(field). Return the first non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. - -* field: mandatory. The field to return the first value for. - -Example:: - - os> source=accounts | stats first(firstname) by gender; - fetched rows / total rows = 2/2 - +------------------+--------+ - | first(firstname) | gender | - |------------------+--------| - | Nanette | F | - | Amber | M | - +------------------+--------+ - -LAST ----- - -Description ->>>>>>>>>>> - -Usage: LAST(field). Return the last non-null value of a field based on natural document order. Returns NULL if no records exist, or if all records have NULL values for the field. - -* field: mandatory. The field to return the last value for. - -Example:: - - os> source=accounts | stats last(firstname) by gender; - fetched rows / total rows = 2/2 - +-----------------+--------+ - | last(firstname) | gender | - |-----------------+--------| - | Nanette | F | - | Dale | M | - +-----------------+--------+ - -LIST ----- - -Description ->>>>>>>>>>> - -Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved. -The function returns up to 100 values with no guaranteed ordering. - -* expr: The field expression to collect values from. -* This aggregation function doesn't support Array, Struct, Object field types. - -Example with string fields:: - - PPL> source=accounts | stats list(firstname); - fetched rows / total rows = 1/1 - +-------------------------------------+ - | list(firstname) | - |-------------------------------------| - | ["Amber","Hattie","Nanette","Dale"] | - +-------------------------------------+ - -VALUES ------- - -Description ->>>>>>>>>>> - -Usage: VALUES(expr). Collects all unique values from the specified expression into a sorted array. Values are converted to strings, nulls are filtered, and duplicates are removed. - -The maximum number of unique values returned is controlled by the ``plugins.ppl.values.max.limit`` setting: - -* Default value is 0, which means unlimited values are returned -* Can be configured to any positive integer to limit the number of unique values -* See the `PPL Settings <../admin/settings.rst#plugins-ppl-values-max-limit>`_ documentation for more details - -Example with string fields:: - - PPL> source=accounts | stats values(firstname); - fetched rows / total rows = 1/1 - +-------------------------------------+ - | values(firstname) | - |-------------------------------------| - | ["Amber","Dale","Hattie","Nanette"] | - +-------------------------------------+ \ No newline at end of file diff --git a/docs/user/ppl/functions/collection.md b/docs/user/ppl/functions/collection.md new file mode 100644 index 00000000000..3c004a22107 --- /dev/null +++ b/docs/user/ppl/functions/collection.md @@ -0,0 +1,727 @@ +# PPL Collection Functions + +## ARRAY + +### Description + +Usage: `array(value1, value2, value3...)` create an array with input values. Currently we don't allow mixture types. We will infer a least restricted type, for example `array(1, "demo")` -> ["1", "demo"] +Argument type: value1: ANY, value2: ANY, ... +Return type: ARRAY +Example + +```ppl +source=people +| eval array = array(1, 2, 3) +| fields array +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| array | +|---------| +| [1,2,3] | ++---------+ +``` + +```ppl +source=people +| eval array = array(1, "demo") +| fields array +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| array | +|----------| +| [1,demo] | ++----------+ +``` + +## ARRAY_LENGTH + +### Description + +Usage: `array_length(array)` returns the length of input array. +Argument type: array:ARRAY +Return type: INTEGER +Example + +```ppl +source=people +| eval array = array(1, 2, 3) +| eval length = array_length(array) +| fields length +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| length | +|--------| +| 3 | ++--------+ +``` + +## FORALL + +### Description + +Usage: `forall(array, function)` check whether all element inside array can meet the lambda function. The function should also return boolean. The lambda function accepts one single input. +Argument type: array:ARRAY, function:LAMBDA +Return type: BOOLEAN +Example + +```ppl +source=people +| eval array = array(1, 2, 3), result = forall(array, x -> x > 0) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| True | ++--------+ +``` + +## EXISTS + +### Description + +Usage: `exists(array, function)` check whether existing one of element inside array can meet the lambda function. The function should also return boolean. The lambda function accepts one single input. +Argument type: array:ARRAY, function:LAMBDA +Return type: BOOLEAN +Example + +```ppl +source=people +| eval array = array(-1, -2, 3), result = exists(array, x -> x > 0) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| True | ++--------+ +``` + +## FILTER + +### Description + +Usage: `filter(array, function)` filter the element in the array by the lambda function. The function should return boolean. The lambda function accepts one single input. +Argument type: array:ARRAY, function:LAMBDA +Return type: ARRAY +Example + +```ppl +source=people +| eval array = array(1, -2, 3), result = filter(array, x -> x > 0) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| [1,3] | ++--------+ +``` + +## TRANSFORM + +### Description + +Usage: `transform(array, function)` transform the element of array one by one using lambda. The lambda function can accept one single input or two input. If the lambda accepts two argument, the second one is the index of element in array. +Argument type: array:ARRAY, function:LAMBDA +Return type: ARRAY +Example + +```ppl +source=people +| eval array = array(1, -2, 3), result = transform(array, x -> x + 2) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| result | +|---------| +| [3,0,5] | ++---------+ +``` + +```ppl +source=people +| eval array = array(1, -2, 3), result = transform(array, (x, i) -> x + i) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| result | +|----------| +| [1,-1,5] | ++----------+ +``` + +## REDUCE + +### Description + +Usage: `reduce(array, acc_base, function, )` use lambda function to go through all element and interact with acc_base. The lambda function accept two argument accumulator and array element. If add one more reduce_function, will apply reduce_function to accumulator finally. The reduce function accept accumulator as the one argument. +Argument type: array:ARRAY, acc_base:ANY, function:LAMBDA, reduce_function:LAMBDA +Return type: ANY +Example + +```ppl +source=people +| eval array = array(1, -2, 3), result = reduce(array, 10, (acc, x) -> acc + x) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| 12 | ++--------+ +``` + +```ppl +source=people +| eval array = array(1, -2, 3), result = reduce(array, 10, (acc, x) -> acc + x, acc -> acc * 10) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| 120 | ++--------+ +``` + +## MVJOIN + +### Description + +Usage: mvjoin(array, delimiter) joins string array elements into a single string, separated by the specified delimiter. NULL elements are excluded from the output. Only string arrays are supported. +Argument type: array: ARRAY of STRING, delimiter: STRING +Return type: STRING +Example + +```ppl +source=people +| eval result = mvjoin(array('a', 'b', 'c'), ',') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| a,b,c | ++--------+ +``` + +```ppl +source=accounts +| eval names_array = array(firstname, lastname) +| eval result = mvjoin(names_array, ', ') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------+ +| result | +|-------------| +| Amber, Duke | ++-------------+ +``` + +## MVAPPEND + +### Description + +Usage: mvappend(value1, value2, value3...) appends all elements from arguments to create an array. Flattens array arguments and collects all individual elements. Always returns an array or null for consistent type behavior. +Argument type: value1: ANY, value2: ANY, ... +Return type: ARRAY +Example + +```ppl +source=people +| eval result = mvappend(1, 1, 3) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| result | +|---------| +| [1,1,3] | ++---------+ +``` + +```ppl +source=people +| eval result = mvappend(1, array(2, 3)) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| result | +|---------| +| [1,2,3] | ++---------+ +``` + +```ppl +source=people +| eval result = mvappend(mvappend(1, 2), 3) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| result | +|---------| +| [1,2,3] | ++---------+ +``` + +```ppl +source=people +| eval result = mvappend(42) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| [42] | ++--------+ +``` + +```ppl +source=people +| eval result = mvappend(nullif(1, 1), 2) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| [2] | ++--------+ +``` + +```ppl +source=people +| eval result = mvappend(nullif(1, 1)) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| null | ++--------+ +``` + +```ppl +source=people +| eval arr1 = array(1, 2), arr2 = array(3, 4), result = mvappend(arr1, arr2) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+ +| result | +|-----------| +| [1,2,3,4] | ++-----------+ +``` + +```ppl +source=accounts +| eval result = mvappend(firstname, lastname) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| result | +|--------------| +| [Amber,Duke] | ++--------------+ +``` + +```ppl +source=people +| eval result = mvappend(1, 'text', 2.5) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| result | +|--------------| +| [1,text,2.5] | ++--------------+ +``` + +## SPLIT + +### Description + +Usage: split(str, delimiter) splits the string values on the delimiter and returns the string values as a multivalue field (array). Use an empty string ("") to split the original string into one value per character. If the delimiter is not found, returns an array containing the original string. If the input string is empty, returns an empty array. + +Argument type: str: STRING, delimiter: STRING + +Return type: ARRAY of STRING + +### Example + +```ppl +source=people +| eval test = 'buttercup;rarity;tenderhoof;dash', result = split(test, ';') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------------------+ +| result | +|------------------------------------| +| [buttercup,rarity,tenderhoof,dash] | ++------------------------------------+ +``` + +```ppl +source=people +| eval test = '1a2b3c4def567890', result = split(test, 'def') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------+ +| result | +|------------------| +| [1a2b3c4,567890] | ++------------------+ +``` + +```ppl +source=people +| eval test = 'abcd', result = split(test, '') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+ +| result | +|-----------| +| [a,b,c,d] | ++-----------+ +``` + +```ppl +source=people +| eval test = 'name::value', result = split(test, '::') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| result | +|--------------| +| [name,value] | ++--------------+ +``` + +```ppl +source=people +| eval test = 'hello', result = split(test, ',') +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| result | +|---------| +| [hello] | ++---------+ +``` + +## MVDEDUP + +### Description + +Usage: mvdedup(array) removes duplicate values from a multivalue array while preserving the order of first occurrence. NULL elements are filtered out. Returns an array with duplicates removed, or null if the input is null. +Argument type: array: ARRAY +Return type: ARRAY +Example + +```ppl +source=people +| eval array = array(1, 2, 2, 3, 1, 4), result = mvdedup(array) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+ +| result | +|-----------| +| [1,2,3,4] | ++-----------+ +``` + +```ppl +source=people +| eval array = array('z', 'a', 'z', 'b', 'a', 'c'), result = mvdedup(array) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+ +| result | +|-----------| +| [z,a,b,c] | ++-----------+ +``` + +```ppl +source=people +| eval array = array(), result = mvdedup(array) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| [] | ++--------+ +``` + +## MVINDEX + +### Description + +Usage: mvindex(array, start, [end]) returns a subset of the multivalue array using the start and optional end index values. Indexes are 0-based (first element is at index 0). Supports negative indexing where -1 refers to the last element. When only start is provided, returns a single element. When both start and end are provided, returns an array of elements from start to end (inclusive). +Argument type: array: ARRAY, start: INTEGER, end: INTEGER (optional) +Return type: ANY (single element) or ARRAY (range) +Example + +```ppl +source=people +| eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, 1) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| b | ++--------+ +``` + +```ppl +source=people +| eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, -1) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| result | +|--------| +| e | ++--------+ +``` + +```ppl +source=people +| eval array = array(1, 2, 3, 4, 5), result = mvindex(array, 1, 3) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| result | +|---------| +| [2,3,4] | ++---------+ +``` + +```ppl +source=people +| eval array = array(1, 2, 3, 4, 5), result = mvindex(array, -3, -1) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| result | +|---------| +| [3,4,5] | ++---------+ +``` + +```ppl +source=people +| eval array = array('alex', 'celestino', 'claudia', 'david'), result = mvindex(array, 0, 2) +| fields result +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------+ +| result | +|--------------------------| +| [alex,celestino,claudia] | ++--------------------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/collection.rst b/docs/user/ppl/functions/collection.rst deleted file mode 100644 index fdea75d3e81..00000000000 --- a/docs/user/ppl/functions/collection.rst +++ /dev/null @@ -1,450 +0,0 @@ -=========================== -PPL Collection Functions -=========================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -ARRAY ------ - -Description ->>>>>>>>>>> - -Usage: ``array(value1, value2, value3...)`` create an array with input values. Currently we don't allow mixture types. We will infer a least restricted type, for example ``array(1, "demo")`` -> ["1", "demo"] - -Argument type: value1: ANY, value2: ANY, ... - -Return type: ARRAY - -Example:: - - os> source=people | eval array = array(1, 2, 3) | fields array | head 1 - fetched rows / total rows = 1/1 - +---------+ - | array | - |---------| - | [1,2,3] | - +---------+ - - os> source=people | eval array = array(1, "demo") | fields array | head 1 - fetched rows / total rows = 1/1 - +----------+ - | array | - |----------| - | [1,demo] | - +----------+ - -ARRAY_LENGTH ------------- - -Description ->>>>>>>>>>> - -Usage: ``array_length(array)`` returns the length of input array. - -Argument type: array:ARRAY - -Return type: INTEGER - -Example:: - - os> source=people | eval array = array(1, 2, 3) | eval length = array_length(array) | fields length | head 1 - fetched rows / total rows = 1/1 - +--------+ - | length | - |--------| - | 3 | - +--------+ - -FORALL ------- - -Description ->>>>>>>>>>> - -Usage: ``forall(array, function)`` check whether all element inside array can meet the lambda function. The function should also return boolean. The lambda function accepts one single input. - -Argument type: array:ARRAY, function:LAMBDA - -Return type: BOOLEAN - -Example:: - - os> source=people | eval array = array(1, 2, 3), result = forall(array, x -> x > 0) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | True | - +--------+ - -EXISTS ------- - -Description ->>>>>>>>>>> - -Usage: ``exists(array, function)`` check whether existing one of element inside array can meet the lambda function. The function should also return boolean. The lambda function accepts one single input. - -Argument type: array:ARRAY, function:LAMBDA - -Return type: BOOLEAN - -Example:: - - os> source=people | eval array = array(-1, -2, 3), result = exists(array, x -> x > 0) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | True | - +--------+ - -FILTER ------- - -Description ->>>>>>>>>>> - -Usage: ``filter(array, function)`` filter the element in the array by the lambda function. The function should return boolean. The lambda function accepts one single input. - -Argument type: array:ARRAY, function:LAMBDA - -Return type: ARRAY - -Example:: - - os> source=people | eval array = array(1, -2, 3), result = filter(array, x -> x > 0) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | [1,3] | - +--------+ - -TRANSFORM ---------- - -Description ->>>>>>>>>>> - -Usage: ``transform(array, function)`` transform the element of array one by one using lambda. The lambda function can accept one single input or two input. If the lambda accepts two argument, the second one is the index of element in array. - -Argument type: array:ARRAY, function:LAMBDA - -Return type: ARRAY - -Example:: - - os> source=people | eval array = array(1, -2, 3), result = transform(array, x -> x + 2) | fields result | head 1 - fetched rows / total rows = 1/1 - +---------+ - | result | - |---------| - | [3,0,5] | - +---------+ - - os> source=people | eval array = array(1, -2, 3), result = transform(array, (x, i) -> x + i) | fields result | head 1 - fetched rows / total rows = 1/1 - +----------+ - | result | - |----------| - | [1,-1,5] | - +----------+ - -REDUCE ------- - -Description ->>>>>>>>>>> - -Usage: ``reduce(array, acc_base, function, )`` use lambda function to go through all element and interact with acc_base. The lambda function accept two argument accumulator and array element. If add one more reduce_function, will apply reduce_function to accumulator finally. The reduce function accept accumulator as the one argument. - -Argument type: array:ARRAY, acc_base:ANY, function:LAMBDA, reduce_function:LAMBDA - -Return type: ANY - -Example:: - - os> source=people | eval array = array(1, -2, 3), result = reduce(array, 10, (acc, x) -> acc + x) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | 12 | - +--------+ - - os> source=people | eval array = array(1, -2, 3), result = reduce(array, 10, (acc, x) -> acc + x, acc -> acc * 10) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | 120 | - +--------+ - -SPLIT ------ - -Description ->>>>>>>>>>> - -Usage: split(str, delimiter) splits the string values on the delimiter and returns the string values as a multivalue field (array). Use an empty string ("") to split the original string into one value per character. If the delimiter is not found, returns an array containing the original string. If the input string is empty, returns an empty array. - -Argument type: str: STRING, delimiter: STRING - -Return type: ARRAY of STRING - -Example:: - - os> source=people | eval test = 'buttercup;rarity;tenderhoof;dash', result = split(test, ';') | fields result | head 1 - fetched rows / total rows = 1/1 - +------------------------------------+ - | result | - |------------------------------------| - | [buttercup,rarity,tenderhoof,dash] | - +------------------------------------+ - - os> source=people | eval test = '1a2b3c4def567890', result = split(test, 'def') | fields result | head 1 - fetched rows / total rows = 1/1 - +------------------+ - | result | - |------------------| - | [1a2b3c4,567890] | - +------------------+ - - os> source=people | eval test = 'abcd', result = split(test, '') | fields result | head 1 - fetched rows / total rows = 1/1 - +-----------+ - | result | - |-----------| - | [a,b,c,d] | - +-----------+ - - os> source=people | eval test = 'name::value', result = split(test, '::') | fields result | head 1 - fetched rows / total rows = 1/1 - +--------------+ - | result | - |--------------| - | [name,value] | - +--------------+ - - os> source=people | eval test = 'hello', result = split(test, ',') | fields result | head 1 - fetched rows / total rows = 1/1 - +---------+ - | result | - |---------| - | [hello] | - +---------+ - -MVJOIN ------- - -Description ->>>>>>>>>>> - -Usage: mvjoin(array, delimiter) joins string array elements into a single string, separated by the specified delimiter. NULL elements are excluded from the output. Only string arrays are supported. - -Argument type: array: ARRAY of STRING, delimiter: STRING - -Return type: STRING - -Example:: - - os> source=people | eval result = mvjoin(array('a', 'b', 'c'), ',') | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | a,b,c | - +--------+ - - os> source=accounts | eval names_array = array(firstname, lastname) | eval result = mvjoin(names_array, ', ') | fields result | head 1 - fetched rows / total rows = 1/1 - +-------------+ - | result | - |-------------| - | Amber, Duke | - +-------------+ - -MVAPPEND --------- - -Description ->>>>>>>>>>> - -Usage: mvappend(value1, value2, value3...) appends all elements from arguments to create an array. Flattens array arguments and collects all individual elements. Always returns an array or null for consistent type behavior. - -Argument type: value1: ANY, value2: ANY, ... - -Return type: ARRAY - -Example:: - - os> source=people | eval result = mvappend(1, 1, 3) | fields result | head 1 - fetched rows / total rows = 1/1 - +---------+ - | result | - |---------| - | [1,1,3] | - +---------+ - - os> source=people | eval result = mvappend(1, array(2, 3)) | fields result | head 1 - fetched rows / total rows = 1/1 - +---------+ - | result | - |---------| - | [1,2,3] | - +---------+ - - os> source=people | eval result = mvappend(mvappend(1, 2), 3) | fields result | head 1 - fetched rows / total rows = 1/1 - +---------+ - | result | - |---------| - | [1,2,3] | - +---------+ - - os> source=people | eval result = mvappend(42) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | [42] | - +--------+ - - os> source=people | eval result = mvappend(nullif(1, 1), 2) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | [2] | - +--------+ - - os> source=people | eval result = mvappend(nullif(1, 1)) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | null | - +--------+ - - os> source=people | eval arr1 = array(1, 2), arr2 = array(3, 4), result = mvappend(arr1, arr2) | fields result | head 1 - fetched rows / total rows = 1/1 - +-----------+ - | result | - |-----------| - | [1,2,3,4] | - +-----------+ - - os> source=accounts | eval result = mvappend(firstname, lastname) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------------+ - | result | - |--------------| - | [Amber,Duke] | - +--------------+ - - os> source=people | eval result = mvappend(1, 'text', 2.5) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------------+ - | result | - |--------------| - | [1,text,2.5] | - +--------------+ - -MVDEDUP -------- - -Description ->>>>>>>>>>> - -Usage: mvdedup(array) removes duplicate values from a multivalue array while preserving the order of first occurrence. NULL elements are filtered out. Returns an array with duplicates removed, or null if the input is null. - -Argument type: array: ARRAY - -Return type: ARRAY - -Example:: - - os> source=people | eval array = array(1, 2, 2, 3, 1, 4), result = mvdedup(array) | fields result | head 1 - fetched rows / total rows = 1/1 - +-----------+ - | result | - |-----------| - | [1,2,3,4] | - +-----------+ - - os> source=people | eval array = array('z', 'a', 'z', 'b', 'a', 'c'), result = mvdedup(array) | fields result | head 1 - fetched rows / total rows = 1/1 - +-----------+ - | result | - |-----------| - | [z,a,b,c] | - +-----------+ - - os> source=people | eval array = array(), result = mvdedup(array) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | [] | - +--------+ - -MVINDEX -------- - -Description ->>>>>>>>>>> - -Usage: mvindex(array, start, [end]) returns a subset of the multivalue array using the start and optional end index values. Indexes are 0-based (first element is at index 0). Supports negative indexing where -1 refers to the last element. When only start is provided, returns a single element. When both start and end are provided, returns an array of elements from start to end (inclusive). - -Argument type: array: ARRAY, start: INTEGER, end: INTEGER (optional) - -Return type: ANY (single element) or ARRAY (range) - -Example:: - - os> source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, 1) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | b | - +--------+ - - os> source=people | eval array = array('a', 'b', 'c', 'd', 'e'), result = mvindex(array, -1) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------+ - | result | - |--------| - | e | - +--------+ - - os> source=people | eval array = array(1, 2, 3, 4, 5), result = mvindex(array, 1, 3) | fields result | head 1 - fetched rows / total rows = 1/1 - +---------+ - | result | - |---------| - | [2,3,4] | - +---------+ - - os> source=people | eval array = array(1, 2, 3, 4, 5), result = mvindex(array, -3, -1) | fields result | head 1 - fetched rows / total rows = 1/1 - +---------+ - | result | - |---------| - | [3,4,5] | - +---------+ - - os> source=people | eval array = array('alex', 'celestino', 'claudia', 'david'), result = mvindex(array, 0, 2) | fields result | head 1 - fetched rows / total rows = 1/1 - +--------------------------+ - | result | - |--------------------------| - | [alex,celestino,claudia] | - +--------------------------+ - diff --git a/docs/user/ppl/functions/condition.md b/docs/user/ppl/functions/condition.md new file mode 100644 index 00000000000..8d65680fcda --- /dev/null +++ b/docs/user/ppl/functions/condition.md @@ -0,0 +1,803 @@ +# Condition Functions + +## ISNULL + +### Description + +Usage: isnull(field) returns TRUE if field is NULL, FALSE otherwise. +The `isnull()` function is commonly used: +- In `eval` expressions to create conditional fields +- With the `if()` function to provide default values +- In `where` clauses to filter null records + +Argument type: all the supported data types. +Return type: BOOLEAN +Example + +```ppl +source=accounts +| eval result = isnull(employer) +| fields result, employer, firstname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+----------+-----------+ +| result | employer | firstname | +|--------+----------+-----------| +| False | Pyrami | Amber | +| False | Netagy | Hattie | +| False | Quility | Nanette | +| True | null | Dale | ++--------+----------+-----------+ +``` + +Using with if() to label records + +```ppl +source=accounts +| eval status = if(isnull(employer), 'unemployed', 'employed') +| fields firstname, employer, status +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+------------+ +| firstname | employer | status | +|-----------+----------+------------| +| Amber | Pyrami | employed | +| Hattie | Netagy | employed | +| Nanette | Quility | employed | +| Dale | null | unemployed | ++-----------+----------+------------+ +``` + +Filtering with where clause + +```ppl +source=accounts +| where isnull(employer) +| fields account_number, firstname, employer +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+-----------+----------+ +| account_number | firstname | employer | +|----------------+-----------+----------| +| 18 | Dale | null | ++----------------+-----------+----------+ +``` + +## ISNOTNULL + +### Description + +Usage: isnotnull(field) returns TRUE if field is NOT NULL, FALSE otherwise. +The `isnotnull()` function is commonly used: +- In `eval` expressions to create boolean flags +- In `where` clauses to filter out null values +- With the `if()` function for conditional logic +- To validate data presence + +Argument type: all the supported data types. +Return type: BOOLEAN +Synonyms: [ISPRESENT](#ispresent) +Example + +```ppl +source=accounts +| eval has_employer = isnotnull(employer) +| fields firstname, employer, has_employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+--------------+ +| firstname | employer | has_employer | +|-----------+----------+--------------| +| Amber | Pyrami | True | +| Hattie | Netagy | True | +| Nanette | Quility | True | +| Dale | null | False | ++-----------+----------+--------------+ +``` + +Filtering with where clause + +```ppl +source=accounts +| where not isnotnull(employer) +| fields account_number, employer +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+----------+ +| account_number | employer | +|----------------+----------| +| 18 | null | ++----------------+----------+ +``` + +Using with if() for validation messages + +```ppl +source=accounts +| eval validation = if(isnotnull(employer), 'valid', 'missing employer') +| fields firstname, employer, validation +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+------------------+ +| firstname | employer | validation | +|-----------+----------+------------------| +| Amber | Pyrami | valid | +| Hattie | Netagy | valid | +| Nanette | Quility | valid | +| Dale | null | missing employer | ++-----------+----------+------------------+ +``` + +## EXISTS + +[Since OpenSearch doesn't differentiate null and missing](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html), we can't provide functions like ismissing/isnotmissing to test if a field exists or not. But you can still use isnull/isnotnull for such purpose. +Example, the account 13 doesn't have email field + +```ppl +source=accounts +| where isnull(email) +| fields account_number, email +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+-------+ +| account_number | email | +|----------------+-------| +| 13 | null | ++----------------+-------+ +``` + +## IFNULL + +### Description + +Usage: ifnull(field1, field2) returns field2 if field1 is null. +Argument type: all the supported data types (NOTE : if two parameters have different types, you will fail semantic check). +Return type: any +Example + +```ppl +source=accounts +| eval result = ifnull(employer, 'default') +| fields result, employer, firstname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+----------+-----------+ +| result | employer | firstname | +|---------+----------+-----------| +| Pyrami | Pyrami | Amber | +| Netagy | Netagy | Hattie | +| Quility | Quility | Nanette | +| default | null | Dale | ++---------+----------+-----------+ +``` + +### Nested IFNULL Pattern + +For OpenSearch versions prior to 3.1, COALESCE-like functionality can be achieved using nested IFNULL statements. This pattern is particularly useful in observability use cases where field names may vary across different data sources. +Usage: ifnull(field1, ifnull(field2, ifnull(field3, default_value))) +Example + +```ppl +source=accounts +| eval result = ifnull(employer, ifnull(firstname, ifnull(lastname, "unknown"))) +| fields result, employer, firstname, lastname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+----------+-----------+----------+ +| result | employer | firstname | lastname | +|---------+----------+-----------+----------| +| Pyrami | Pyrami | Amber | Duke | +| Netagy | Netagy | Hattie | Bond | +| Quility | Quility | Nanette | Bates | +| Dale | null | Dale | Adams | ++---------+----------+-----------+----------+ +``` + +## NULLIF + +### Description + +Usage: nullif(field1, field2) returns null if two parameters are same, otherwise returns field1. +Argument type: all the supported data types (NOTE : if two parameters have different types, you will fail semantic check). +Return type: any +Example + +```ppl +source=accounts +| eval result = nullif(employer, 'Pyrami') +| fields result, employer, firstname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+----------+-----------+ +| result | employer | firstname | +|---------+----------+-----------| +| null | Pyrami | Amber | +| Netagy | Netagy | Hattie | +| Quility | Quility | Nanette | +| null | null | Dale | ++---------+----------+-----------+ +``` + +## IF + +### Description + +Usage: if(condition, expr1, expr2) returns expr1 if condition is true, otherwise returns expr2. +Argument type: all the supported data types (NOTE : if expr1 and expr2 are different types, you will fail semantic check). +Return type: any +Example + +```ppl +source=accounts +| eval result = if(true, firstname, lastname) +| fields result, firstname, lastname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+-----------+----------+ +| result | firstname | lastname | +|---------+-----------+----------| +| Amber | Amber | Duke | +| Hattie | Hattie | Bond | +| Nanette | Nanette | Bates | +| Dale | Dale | Adams | ++---------+-----------+----------+ +``` + +```ppl +source=accounts +| eval result = if(false, firstname, lastname) +| fields result, firstname, lastname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+-----------+----------+ +| result | firstname | lastname | +|--------+-----------+----------| +| Duke | Amber | Duke | +| Bond | Hattie | Bond | +| Bates | Nanette | Bates | +| Adams | Dale | Adams | ++--------+-----------+----------+ +``` + +```ppl +source=accounts +| eval is_vip = if(age > 30 AND isnotnull(employer), true, false) +| fields is_vip, firstname, lastname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+-----------+----------+ +| is_vip | firstname | lastname | +|--------+-----------+----------| +| True | Amber | Duke | +| True | Hattie | Bond | +| False | Nanette | Bates | +| False | Dale | Adams | ++--------+-----------+----------+ +``` + +## CASE + +### Description + +Usage: case(condition1, expr1, condition2, expr2, ... conditionN, exprN else default) returns expr1 if condition1 is true, or returns expr2 if condition2 is true, ... if no condition is true, then returns the value of ELSE clause. If the ELSE clause is not defined, returns NULL. +Argument type: all the supported data types (NOTE : there is no comma before "else"). +Return type: any +### Limitations + +When each condition is a field comparison with a numeric literal and each result expression is a string literal, the query will be optimized as [range aggregations](https://docs.opensearch.org/latest/aggregations/bucket/range) if pushdown optimization is enabled. However, this optimization has the following limitations: +- Null values will not be grouped into any bucket of a range aggregation and will be ignored +- The default ELSE clause will use the string literal `"null"` instead of actual NULL values + +Example + +```ppl +source=accounts +| eval result = case(age > 35, firstname, age < 30, lastname else employer) +| fields result, firstname, lastname, age, employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+-----------+----------+-----+----------+ +| result | firstname | lastname | age | employer | +|--------+-----------+----------+-----+----------| +| Pyrami | Amber | Duke | 32 | Pyrami | +| Hattie | Hattie | Bond | 36 | Netagy | +| Bates | Nanette | Bates | 28 | Quility | +| null | Dale | Adams | 33 | null | ++--------+-----------+----------+-----+----------+ +``` + +```ppl +source=accounts +| eval result = case(age > 35, firstname, age < 30, lastname) +| fields result, firstname, lastname, age +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+-----------+----------+-----+ +| result | firstname | lastname | age | +|--------+-----------+----------+-----| +| null | Amber | Duke | 32 | +| Hattie | Hattie | Bond | 36 | +| Bates | Nanette | Bates | 28 | +| null | Dale | Adams | 33 | ++--------+-----------+----------+-----+ +``` + +```ppl +source=accounts +| where true = case(age > 35, false, age < 30, false else true) +| fields firstname, lastname, age +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------+----------+-----+ +| firstname | lastname | age | +|-----------+----------+-----| +| Amber | Duke | 32 | +| Dale | Adams | 33 | ++-----------+----------+-----+ +``` + +## COALESCE + +### Description + +Usage: coalesce(field1, field2, ...) returns the first non-null, non-missing value in the argument list. +Argument type: all the supported data types. Supports mixed data types with automatic type coercion. +Return type: determined by the least restrictive common type among all arguments, with fallback to string if no common type can be determined +Behavior: +- Returns the first value that is not null and not missing (missing includes non-existent fields) +- Empty strings ("") and whitespace strings (" ") are considered valid values +- If all arguments are null or missing, returns null +- Automatic type coercion is applied to match the determined return type +- If type conversion fails, the value is converted to string representation +- For best results, use arguments of the same data type to avoid unexpected type conversions + +Performance Considerations: +- Optimized for multiple field evaluation, more efficient than nested IFNULL patterns +- Evaluates arguments sequentially, stopping at the first non-null value +- Consider field order based on likelihood of containing values to minimize evaluation overhead + +Limitations: +- Type coercion may result in unexpected string conversions for incompatible types +- Performance may degrade with very large numbers of arguments + +Example + +```ppl +source=accounts +| eval result = coalesce(employer, firstname, lastname) +| fields result, firstname, lastname, employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+-----------+----------+----------+ +| result | firstname | lastname | employer | +|---------+-----------+----------+----------| +| Pyrami | Amber | Duke | Pyrami | +| Netagy | Hattie | Bond | Netagy | +| Quility | Nanette | Bates | Quility | +| Dale | Dale | Adams | null | ++---------+-----------+----------+----------+ +``` + +Empty String Handling Examples + +```ppl +source=accounts +| eval empty_field = "" +| eval result = coalesce(empty_field, firstname) +| fields result, empty_field, firstname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+-------------+-----------+ +| result | empty_field | firstname | +|--------+-------------+-----------| +| | | Amber | +| | | Hattie | +| | | Nanette | +| | | Dale | ++--------+-------------+-----------+ +``` + +```ppl +source=accounts +| eval result = coalesce(" ", firstname) +| fields result, firstname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------+-----------+ +| result | firstname | +|--------+-----------| +| | Amber | +| | Hattie | +| | Nanette | +| | Dale | ++--------+-----------+ +``` + +Mixed Data Types with Auto Coercion + +```ppl +source=accounts +| eval result = coalesce(employer, balance, "fallback") +| fields result, employer, balance +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+----------+---------+ +| result | employer | balance | +|---------+----------+---------| +| Pyrami | Pyrami | 39225 | +| Netagy | Netagy | 5686 | +| Quility | Quility | 32838 | +| 4180 | null | 4180 | ++---------+----------+---------+ +``` + +Non-existent Field Handling + +```ppl +source=accounts +| eval result = coalesce(nonexistent_field, firstname, "unknown") +| fields result, firstname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------+-----------+ +| result | firstname | +|---------+-----------| +| Amber | Amber | +| Hattie | Hattie | +| Nanette | Nanette | +| Dale | Dale | ++---------+-----------+ +``` + +## ISPRESENT + +### Description + +Usage: ispresent(field) returns true if the field exists. +Argument type: all the supported data types. +Return type: BOOLEAN +Synonyms: [ISNOTNULL](#isnotnull) +Example + +```ppl +source=accounts +| where ispresent(employer) +| fields employer, firstname +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------+-----------+ +| employer | firstname | +|----------+-----------| +| Pyrami | Amber | +| Netagy | Hattie | +| Quility | Nanette | ++----------+-----------+ +``` + +## ISBLANK + +### Description + +Usage: isblank(field) returns true if the field is null, an empty string, or contains only white space. +Argument type: all the supported data types. +Return type: BOOLEAN +Example + +```ppl +source=accounts +| eval temp = ifnull(employer, ' ') +| eval `isblank(employer)` = isblank(employer), `isblank(temp)` = isblank(temp) +| fields `isblank(temp)`, temp, `isblank(employer)`, employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------------+---------+-------------------+----------+ +| isblank(temp) | temp | isblank(employer) | employer | +|---------------+---------+-------------------+----------| +| False | Pyrami | False | Pyrami | +| False | Netagy | False | Netagy | +| False | Quility | False | Quility | +| True | | True | null | ++---------------+---------+-------------------+----------+ +``` + +## ISEMPTY + +### Description + +Usage: isempty(field) returns true if the field is null or is an empty string. +Argument type: all the supported data types. +Return type: BOOLEAN +Example + +```ppl +source=accounts +| eval temp = ifnull(employer, ' ') +| eval `isempty(employer)` = isempty(employer), `isempty(temp)` = isempty(temp) +| fields `isempty(temp)`, temp, `isempty(employer)`, employer +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++---------------+---------+-------------------+----------+ +| isempty(temp) | temp | isempty(employer) | employer | +|---------------+---------+-------------------+----------| +| False | Pyrami | False | Pyrami | +| False | Netagy | False | Netagy | +| False | Quility | False | Quility | +| False | | True | null | ++---------------+---------+-------------------+----------+ +``` + +## EARLIEST + +### Description + +Usage: earliest(relative_string, field) returns true if the value of field is after the timestamp derived from relative_string relative to the current time. Otherwise, returns false. +relative_string: +The relative string can be one of the following formats: +1. `"now"` or `"now()"`: + + Uses the current system time. +2. Absolute format (`MM/dd/yyyy:HH:mm:ss` or `yyyy-MM-dd HH:mm:ss`): + + Converts the string to a timestamp and compares it with the data. +3. Relative format: `(+|-)[+<...>]@` + + Steps to specify a relative time: + - **a. Time offset:** Indicate the offset from the current time using `+` or `-`. + - **b. Time amount:** Provide a numeric value followed by a time unit (`s`, `m`, `h`, `d`, `w`, `M`, `y`). + - **c. Snap to unit:** Optionally specify a snap unit with `@` to round the result down to the nearest unit (e.g., hour, day, month). + + **Examples** (assuming current time is `2025-05-28 14:28:34`): + - `-3d+2y` → `2027-05-25 14:28:34` + - `+1d@m` → `2025-05-29 14:28:00` + - `-3M+1y@M` → `2026-02-01 00:00:00` + +Read more details [here](https://github.com/opensearch-project/opensearch-spark/blob/main/docs/ppl-lang/functions/ppl-datetime.md#relative_timestamp) +Argument type: relative_string:STRING, field: TIMESTAMP +Return type: BOOLEAN +Example + +```ppl +source=accounts +| eval now = utc_timestamp() +| eval a = earliest("now", now), b = earliest("-2d@d", now) +| fields a, b +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------+------+ +| a | b | +|-------+------| +| False | True | ++-------+------+ +``` + +```ppl +source=nyc_taxi +| where earliest('07/01/2014:00:30:00', timestamp) +| stats COUNT() as cnt +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----+ +| cnt | +|-----| +| 972 | ++-----+ +``` + +## LATEST + +### Description + +Usage: latest(relative_string, field) returns true if the value of field is before the timestamp derived from relative_string relative to the current time. Otherwise, returns false. +Argument type: relative_string:STRING, field: TIMESTAMP +Return type: BOOLEAN +Example + +```ppl +source=accounts +| eval now = utc_timestamp() +| eval a = latest("now", now), b = latest("+2d@d", now) +| fields a, b +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------+------+ +| a | b | +|------+------| +| True | True | ++------+------+ +``` + +```ppl +source=nyc_taxi +| where latest('07/21/2014:04:00:00', timestamp) +| stats COUNT() as cnt +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----+ +| cnt | +|-----| +| 969 | ++-----+ +``` + +## REGEXP_MATCH + +### Description + +Usage: regexp_match(string, pattern) returns true if the regular expression pattern finds a match against any substring of the string value, otherwise returns false. +The function uses Java regular expression syntax for the pattern. +Argument type: STRING, STRING +Return type: BOOLEAN +Example + +``` ppl ignore +source=logs | where regexp_match(message, 'ERROR|WARN|FATAL') | fields timestamp, message +``` + +```text +fetched rows / total rows = 3/100 ++---------------------+------------------------------------------+ +| timestamp | message | +|---------------------+------------------------------------------| +| 2024-01-15 10:23:45 | ERROR: Connection timeout to database | +| 2024-01-15 10:24:12 | WARN: High memory usage detected | +| 2024-01-15 10:25:33 | FATAL: System crashed unexpectedly | ++---------------------+------------------------------------------+ +``` + +``` ppl ignore +source=users | where regexp_match(email, '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') | fields name, email +``` + +```text +fetched rows / total rows = 2/3 ++-------+----------------------+ +| name | email | +|-------+----------------------| +| John | john@example.com | +| Alice | alice@company.org | ++-------+----------------------+ +``` + +```ppl ignore +source=network | where regexp_match(ip_address, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') AND NOT regexp_match(ip_address, '^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)') | fields ip_address, status +``` + +```text +fetched rows / total rows = 2/10 ++---------------+--------+ +| ip_address | status | +|---------------+--------| +| 8.8.8.8 | active | +| 1.1.1.1 | active | ++---------------+--------+ +``` + +```ppl ignore +source=products | eval category = if(regexp_match(name, '(?i)(laptop|computer|desktop)'), 'Computing', if(regexp_match(name, '(?i)(phone|tablet|mobile)'), 'Mobile', 'Other')) | fields name, category +``` + +```text +fetched rows / total rows = 4/4 ++------------------------+----------+ +| name | category | +|------------------------+----------| +| Dell Laptop XPS | Computing| +| iPhone 15 Pro | Mobile | +| Wireless Mouse | Other | +| Desktop Computer Tower | Computing| ++------------------------+----------+ +``` \ No newline at end of file diff --git a/docs/user/ppl/functions/condition.rst b/docs/user/ppl/functions/condition.rst deleted file mode 100644 index 0364fa7b116..00000000000 --- a/docs/user/ppl/functions/condition.rst +++ /dev/null @@ -1,615 +0,0 @@ -=================== -Condition Functions -=================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -ISNULL ------- - -Description ->>>>>>>>>>> - -Usage: isnull(field) returns TRUE if field is NULL, FALSE otherwise. - -The `isnull()` function is commonly used: -- In `eval` expressions to create conditional fields -- With the `if()` function to provide default values -- In `where` clauses to filter null records - -Argument type: all the supported data types. - -Return type: BOOLEAN - -Example:: - - os> source=accounts | eval result = isnull(employer) | fields result, employer, firstname - fetched rows / total rows = 4/4 - +--------+----------+-----------+ - | result | employer | firstname | - |--------+----------+-----------| - | False | Pyrami | Amber | - | False | Netagy | Hattie | - | False | Quility | Nanette | - | True | null | Dale | - +--------+----------+-----------+ - -Using with if() to label records:: - - os> source=accounts | eval status = if(isnull(employer), 'unemployed', 'employed') | fields firstname, employer, status - fetched rows / total rows = 4/4 - +-----------+----------+------------+ - | firstname | employer | status | - |-----------+----------+------------| - | Amber | Pyrami | employed | - | Hattie | Netagy | employed | - | Nanette | Quility | employed | - | Dale | null | unemployed | - +-----------+----------+------------+ - -Filtering with where clause:: - - os> source=accounts | where isnull(employer) | fields account_number, firstname, employer - fetched rows / total rows = 1/1 - +----------------+-----------+----------+ - | account_number | firstname | employer | - |----------------+-----------+----------| - | 18 | Dale | null | - +----------------+-----------+----------+ - -ISNOTNULL ---------- - -Description ->>>>>>>>>>> - -Usage: isnotnull(field) returns TRUE if field is NOT NULL, FALSE otherwise. - -The `isnotnull()` function is commonly used: -- In `eval` expressions to create boolean flags -- In `where` clauses to filter out null values -- With the `if()` function for conditional logic -- To validate data presence - -Argument type: all the supported data types. - -Return type: BOOLEAN - -Synonyms: `ISPRESENT`_ - -Example:: - - os> source=accounts | eval has_employer = isnotnull(employer) | fields firstname, employer, has_employer - fetched rows / total rows = 4/4 - +-----------+----------+--------------+ - | firstname | employer | has_employer | - |-----------+----------+--------------| - | Amber | Pyrami | True | - | Hattie | Netagy | True | - | Nanette | Quility | True | - | Dale | null | False | - +-----------+----------+--------------+ - -Filtering with where clause:: - - os> source=accounts | where not isnotnull(employer) | fields account_number, employer - fetched rows / total rows = 1/1 - +----------------+----------+ - | account_number | employer | - |----------------+----------| - | 18 | null | - +----------------+----------+ - -Using with if() for validation messages:: - - os> source=accounts | eval validation = if(isnotnull(employer), 'valid', 'missing employer') | fields firstname, employer, validation - fetched rows / total rows = 4/4 - +-----------+----------+------------------+ - | firstname | employer | validation | - |-----------+----------+------------------| - | Amber | Pyrami | valid | - | Hattie | Netagy | valid | - | Nanette | Quility | valid | - | Dale | null | missing employer | - +-----------+----------+------------------+ - -EXISTS ------- - -`Since OpenSearch doesn't differentiate null and missing `_, we can't provide functions like ismissing/isnotmissing to test if a field exists or not. But you can still use isnull/isnotnull for such purpose. - -Example, the account 13 doesn't have email field:: - - os> source=accounts | where isnull(email) | fields account_number, email - fetched rows / total rows = 1/1 - +----------------+-------+ - | account_number | email | - |----------------+-------| - | 13 | null | - +----------------+-------+ - -IFNULL ------- - -Description ->>>>>>>>>>> - -Usage: ifnull(field1, field2) returns field2 if field1 is null. - -Argument type: all the supported data types (NOTE : if two parameters have different types, you will fail semantic check). - -Return type: any - -Example:: - - os> source=accounts | eval result = ifnull(employer, 'default') | fields result, employer, firstname - fetched rows / total rows = 4/4 - +---------+----------+-----------+ - | result | employer | firstname | - |---------+----------+-----------| - | Pyrami | Pyrami | Amber | - | Netagy | Netagy | Hattie | - | Quility | Quility | Nanette | - | default | null | Dale | - +---------+----------+-----------+ - -Nested IFNULL Pattern ->>>>>>>>>>>>>>>>>>>>> - -For OpenSearch versions prior to 3.1, COALESCE-like functionality can be achieved using nested IFNULL statements. This pattern is particularly useful in observability use cases where field names may vary across different data sources. - -Usage: ifnull(field1, ifnull(field2, ifnull(field3, default_value))) - -Example:: - - os> source=accounts | eval result = ifnull(employer, ifnull(firstname, ifnull(lastname, "unknown"))) | fields result, employer, firstname, lastname - fetched rows / total rows = 4/4 - +---------+----------+-----------+----------+ - | result | employer | firstname | lastname | - |---------+----------+-----------+----------| - | Pyrami | Pyrami | Amber | Duke | - | Netagy | Netagy | Hattie | Bond | - | Quility | Quility | Nanette | Bates | - | Dale | null | Dale | Adams | - +---------+----------+-----------+----------+ - -NULLIF ------- - -Description ->>>>>>>>>>> - -Usage: nullif(field1, field2) returns null if two parameters are same, otherwise returns field1. - -Argument type: all the supported data types (NOTE : if two parameters have different types, you will fail semantic check). - -Return type: any - -Example:: - - os> source=accounts | eval result = nullif(employer, 'Pyrami') | fields result, employer, firstname - fetched rows / total rows = 4/4 - +---------+----------+-----------+ - | result | employer | firstname | - |---------+----------+-----------| - | null | Pyrami | Amber | - | Netagy | Netagy | Hattie | - | Quility | Quility | Nanette | - | null | null | Dale | - +---------+----------+-----------+ - -IF ------- - -Description ->>>>>>>>>>> - -Usage: if(condition, expr1, expr2) returns expr1 if condition is true, otherwise returns expr2. - -Argument type: all the supported data types (NOTE : if expr1 and expr2 are different types, you will fail semantic check). - -Return type: any - -Example:: - - os> source=accounts | eval result = if(true, firstname, lastname) | fields result, firstname, lastname - fetched rows / total rows = 4/4 - +---------+-----------+----------+ - | result | firstname | lastname | - |---------+-----------+----------| - | Amber | Amber | Duke | - | Hattie | Hattie | Bond | - | Nanette | Nanette | Bates | - | Dale | Dale | Adams | - +---------+-----------+----------+ - - os> source=accounts | eval result = if(false, firstname, lastname) | fields result, firstname, lastname - fetched rows / total rows = 4/4 - +--------+-----------+----------+ - | result | firstname | lastname | - |--------+-----------+----------| - | Duke | Amber | Duke | - | Bond | Hattie | Bond | - | Bates | Nanette | Bates | - | Adams | Dale | Adams | - +--------+-----------+----------+ - - os> source=accounts | eval is_vip = if(age > 30 AND isnotnull(employer), true, false) | fields is_vip, firstname, lastname - fetched rows / total rows = 4/4 - +--------+-----------+----------+ - | is_vip | firstname | lastname | - |--------+-----------+----------| - | True | Amber | Duke | - | True | Hattie | Bond | - | False | Nanette | Bates | - | False | Dale | Adams | - +--------+-----------+----------+ - -CASE ------- - -Description ->>>>>>>>>>> - -Usage: case(condition1, expr1, condition2, expr2, ... conditionN, exprN else default) returns expr1 if condition1 is true, or returns expr2 if condition2 is true, ... if no condition is true, then returns the value of ELSE clause. If the ELSE clause is not defined, returns NULL. - -Argument type: all the supported data types (NOTE : there is no comma before "else"). - -Return type: any - -Limitations ->>>>>>>>>>> - -When each condition is a field comparison with a numeric literal and each result expression is a string literal, the query will be optimized as `range aggregations `_ if pushdown optimization is enabled. However, this optimization has the following limitations: - -- Null values will not be grouped into any bucket of a range aggregation and will be ignored -- The default ELSE clause will use the string literal ``"null"`` instead of actual NULL values - -Example:: - - os> source=accounts | eval result = case(age > 35, firstname, age < 30, lastname else employer) | fields result, firstname, lastname, age, employer - fetched rows / total rows = 4/4 - +--------+-----------+----------+-----+----------+ - | result | firstname | lastname | age | employer | - |--------+-----------+----------+-----+----------| - | Pyrami | Amber | Duke | 32 | Pyrami | - | Hattie | Hattie | Bond | 36 | Netagy | - | Bates | Nanette | Bates | 28 | Quility | - | null | Dale | Adams | 33 | null | - +--------+-----------+----------+-----+----------+ - - os> source=accounts | eval result = case(age > 35, firstname, age < 30, lastname) | fields result, firstname, lastname, age - fetched rows / total rows = 4/4 - +--------+-----------+----------+-----+ - | result | firstname | lastname | age | - |--------+-----------+----------+-----| - | null | Amber | Duke | 32 | - | Hattie | Hattie | Bond | 36 | - | Bates | Nanette | Bates | 28 | - | null | Dale | Adams | 33 | - +--------+-----------+----------+-----+ - - os> source=accounts | where true = case(age > 35, false, age < 30, false else true) | fields firstname, lastname, age - fetched rows / total rows = 2/2 - +-----------+----------+-----+ - | firstname | lastname | age | - |-----------+----------+-----| - | Amber | Duke | 32 | - | Dale | Adams | 33 | - +-----------+----------+-----+ - -COALESCE --------- - -Description ->>>>>>>>>>> - -Usage: coalesce(field1, field2, ...) returns the first non-null, non-missing value in the argument list. - -Argument type: all the supported data types. Supports mixed data types with automatic type coercion. - -Return type: determined by the least restrictive common type among all arguments, with fallback to string if no common type can be determined - -Behavior: - -- Returns the first value that is not null and not missing (missing includes non-existent fields) -- Empty strings ("") and whitespace strings (" ") are considered valid values -- If all arguments are null or missing, returns null -- Automatic type coercion is applied to match the determined return type -- If type conversion fails, the value is converted to string representation -- For best results, use arguments of the same data type to avoid unexpected type conversions - -Performance Considerations: - -- Optimized for multiple field evaluation, more efficient than nested IFNULL patterns -- Evaluates arguments sequentially, stopping at the first non-null value -- Consider field order based on likelihood of containing values to minimize evaluation overhead - -Limitations: - -- Type coercion may result in unexpected string conversions for incompatible types -- Performance may degrade with very large numbers of arguments - -Example:: - - os> source=accounts | eval result = coalesce(employer, firstname, lastname) | fields result, firstname, lastname, employer - fetched rows / total rows = 4/4 - +---------+-----------+----------+----------+ - | result | firstname | lastname | employer | - |---------+-----------+----------+----------| - | Pyrami | Amber | Duke | Pyrami | - | Netagy | Hattie | Bond | Netagy | - | Quility | Nanette | Bates | Quility | - | Dale | Dale | Adams | null | - +---------+-----------+----------+----------+ - -Empty String Handling Examples:: - - os> source=accounts | eval empty_field = "" | eval result = coalesce(empty_field, firstname) | fields result, empty_field, firstname - fetched rows / total rows = 4/4 - +--------+-------------+-----------+ - | result | empty_field | firstname | - |--------+-------------+-----------| - | | | Amber | - | | | Hattie | - | | | Nanette | - | | | Dale | - +--------+-------------+-----------+ - - os> source=accounts | eval result = coalesce(" ", firstname) | fields result, firstname - fetched rows / total rows = 4/4 - +--------+-----------+ - | result | firstname | - |--------+-----------| - | | Amber | - | | Hattie | - | | Nanette | - | | Dale | - +--------+-----------+ - -Mixed Data Types with Auto Coercion:: - - os> source=accounts | eval result = coalesce(employer, balance, "fallback") | fields result, employer, balance - fetched rows / total rows = 4/4 - +---------+----------+---------+ - | result | employer | balance | - |---------+----------+---------| - | Pyrami | Pyrami | 39225 | - | Netagy | Netagy | 5686 | - | Quility | Quility | 32838 | - | 4180 | null | 4180 | - +---------+----------+---------+ - -Non-existent Field Handling:: - - os> source=accounts | eval result = coalesce(nonexistent_field, firstname, "unknown") | fields result, firstname - fetched rows / total rows = 4/4 - +---------+-----------+ - | result | firstname | - |---------+-----------| - | Amber | Amber | - | Hattie | Hattie | - | Nanette | Nanette | - | Dale | Dale | - +---------+-----------+ - - -ISPRESENT ---------- - -Description ->>>>>>>>>>> - -Usage: ispresent(field) returns true if the field exists. - -Argument type: all the supported data types. - -Return type: BOOLEAN - -Synonyms: `ISNOTNULL`_ - -Example:: - - os> source=accounts | where ispresent(employer) | fields employer, firstname - fetched rows / total rows = 3/3 - +----------+-----------+ - | employer | firstname | - |----------+-----------| - | Pyrami | Amber | - | Netagy | Hattie | - | Quility | Nanette | - +----------+-----------+ - -ISBLANK -------- - -Description ->>>>>>>>>>> - -Usage: isblank(field) returns true if the field is null, an empty string, or contains only white space. - -Argument type: all the supported data types. - -Return type: BOOLEAN - -Example:: - - os> source=accounts | eval temp = ifnull(employer, ' ') | eval `isblank(employer)` = isblank(employer), `isblank(temp)` = isblank(temp) | fields `isblank(temp)`, temp, `isblank(employer)`, employer - fetched rows / total rows = 4/4 - +---------------+---------+-------------------+----------+ - | isblank(temp) | temp | isblank(employer) | employer | - |---------------+---------+-------------------+----------| - | False | Pyrami | False | Pyrami | - | False | Netagy | False | Netagy | - | False | Quility | False | Quility | - | True | | True | null | - +---------------+---------+-------------------+----------+ - - -ISEMPTY -------- - -Description ->>>>>>>>>>> - -Usage: isempty(field) returns true if the field is null or is an empty string. - -Argument type: all the supported data types. - -Return type: BOOLEAN - -Example:: - - os> source=accounts | eval temp = ifnull(employer, ' ') | eval `isempty(employer)` = isempty(employer), `isempty(temp)` = isempty(temp) | fields `isempty(temp)`, temp, `isempty(employer)`, employer - fetched rows / total rows = 4/4 - +---------------+---------+-------------------+----------+ - | isempty(temp) | temp | isempty(employer) | employer | - |---------------+---------+-------------------+----------| - | False | Pyrami | False | Pyrami | - | False | Netagy | False | Netagy | - | False | Quility | False | Quility | - | False | | True | null | - +---------------+---------+-------------------+----------+ - -EARLIEST --------- - -Description ->>>>>>>>>>> - -Usage: earliest(relative_string, field) returns true if the value of field is after the timestamp derived from relative_string relative to the current time. Otherwise, returns false. - -relative_string: -The relative string can be one of the following formats: - -1. `"now"` or `"now()"`: - Uses the current system time. - -2. Absolute format (`MM/dd/yyyy:HH:mm:ss` or `yyyy-MM-dd HH:mm:ss`): - Converts the string to a timestamp and compares it with the data. - -3. Relative format: `(+|-)[+<...>]@` - Steps to specify a relative time: - - - **a. Time offset:** Indicate the offset from the current time using `+` or `-`. - - **b. Time amount:** Provide a numeric value followed by a time unit (`s`, `m`, `h`, `d`, `w`, `M`, `y`). - - **c. Snap to unit:** Optionally specify a snap unit with `@` to round the result down to the nearest unit (e.g., hour, day, month). - - **Examples** (assuming current time is `2025-05-28 14:28:34`): - - - `-3d+2y` → `2027-05-25 14:28:34` - - `+1d@m` → `2025-05-29 14:28:00` - - `-3M+1y@M` → `2026-02-01 00:00:00` - -Read more details `here `_ - -Argument type: relative_string:STRING, field: TIMESTAMP - -Return type: BOOLEAN - -Example:: - - os> source=accounts | eval now = utc_timestamp() | eval a = earliest("now", now), b = earliest("-2d@d", now) | fields a, b | head 1 - fetched rows / total rows = 1/1 - +-------+------+ - | a | b | - |-------+------| - | False | True | - +-------+------+ - - os> source=nyc_taxi | where earliest('07/01/2014:00:30:00', timestamp) | stats COUNT() as cnt - fetched rows / total rows = 1/1 - +-----+ - | cnt | - |-----| - | 972 | - +-----+ - -LATEST ------- - -Description ->>>>>>>>>>> - -Usage: latest(relative_string, field) returns true if the value of field is before the timestamp derived from relative_string relative to the current time. Otherwise, returns false. - -Argument type: relative_string:STRING, field: TIMESTAMP - -Return type: BOOLEAN - -Example:: - - os> source=accounts | eval now = utc_timestamp() | eval a = latest("now", now), b = latest("+2d@d", now) | fields a, b | head 1 - fetched rows / total rows = 1/1 - +------+------+ - | a | b | - |------+------| - | True | True | - +------+------+ - - os> source=nyc_taxi | where latest('07/21/2014:04:00:00', timestamp) | stats COUNT() as cnt - fetched rows / total rows = 1/1 - +-----+ - | cnt | - |-----| - | 969 | - +-----+ - -REGEXP_MATCH ------------ - -Description ->>>>>>>>>>> - -Usage: regexp_match(string, pattern) returns true if the regular expression pattern finds a match against any substring of the string value, otherwise returns false. - -The function uses Java regular expression syntax for the pattern. - -Argument type: STRING, STRING - -Return type: BOOLEAN - -Example:: - - #os> source=logs | where regexp_match(message, 'ERROR|WARN|FATAL') | fields timestamp, message - fetched rows / total rows = 3/100 - +---------------------+------------------------------------------+ - | timestamp | message | - |---------------------+------------------------------------------| - | 2024-01-15 10:23:45 | ERROR: Connection timeout to database | - | 2024-01-15 10:24:12 | WARN: High memory usage detected | - | 2024-01-15 10:25:33 | FATAL: System crashed unexpectedly | - +---------------------+------------------------------------------+ - - #os> source=users | where regexp_match(email, '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') | fields name, email - fetched rows / total rows = 2/3 - +-------+----------------------+ - | name | email | - |-------+----------------------| - | John | john@example.com | - | Alice | alice@company.org | - +-------+----------------------+ - - #os> source=network | where regexp_match(ip_address, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') AND NOT regexp_match(ip_address, '^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)') | fields ip_address, status - fetched rows / total rows = 2/10 - +---------------+--------+ - | ip_address | status | - |---------------+--------| - | 8.8.8.8 | active | - | 1.1.1.1 | active | - +---------------+--------+ - - #os> source=products | eval category = if(regexp_match(name, '(?i)(laptop|computer|desktop)'), 'Computing', if(regexp_match(name, '(?i)(phone|tablet|mobile)'), 'Mobile', 'Other')) | fields name, category - fetched rows / total rows = 4/4 - +------------------------+----------+ - | name | category | - |------------------------+----------| - | Dell Laptop XPS | Computing| - | iPhone 15 Pro | Mobile | - | Wireless Mouse | Other | - | Desktop Computer Tower | Computing| - +------------------------+----------+ diff --git a/docs/user/ppl/functions/conversion.md b/docs/user/ppl/functions/conversion.md new file mode 100644 index 00000000000..a33a93bbd69 --- /dev/null +++ b/docs/user/ppl/functions/conversion.md @@ -0,0 +1,272 @@ +# Type Conversion Functions + +## CAST + +### Description + +Usage: cast(expr as dateType) cast the expr to dataType. return the value of dataType. The following conversion rules are used: + +| Src/Target | STRING | NUMBER | BOOLEAN | TIMESTAMP | DATE | TIME | IP | +| --- | --- | --- | --- | --- | --- | --- | --- | +| STRING | | Note1 | Note1 | TIMESTAMP() | DATE() | TIME() | IP() | +| NUMBER | Note1 | | v!=0 | N/A | N/A | N/A | N/A | +| BOOLEAN | Note1 | v?1:0 | | N/A | N/A | N/A | N/A | +| TIMESTAMP | Note1 | N/A | N/A | | DATE() | TIME() | N/A | +| DATE | Note1 | N/A | N/A | N/A | | N/A | N/A | +| TIME | Note1 | N/A | N/A | N/A | N/A | | N/A | +| IP | Note2 | N/A | N/A | N/A | N/A | N/A | | + +Note1: the conversion follow the JDK specification. +Note2: IP will be converted to its canonical representation. Canonical representation +for IPv6 is described in [RFC 5952](https://datatracker.ietf.org/doc/html/rfc5952). +Cast to string example + +```ppl +source=people +| eval `cbool` = CAST(true as string), `cint` = CAST(1 as string), `cdate` = CAST(CAST('2012-08-07' as date) as string) +| fields `cbool`, `cint`, `cdate` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------+------+------------+ +| cbool | cint | cdate | +|-------+------+------------| +| TRUE | 1 | 2012-08-07 | ++-------+------+------------+ +``` + +Cast to number example + +```ppl +source=people +| eval `cbool` = CAST(true as int), `cstring` = CAST('1' as int) +| fields `cbool`, `cstring` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------+---------+ +| cbool | cstring | +|-------+---------| +| 1 | 1 | ++-------+---------+ +``` + +Cast to date example + +```ppl +source=people +| eval `cdate` = CAST('2012-08-07' as date), `ctime` = CAST('01:01:01' as time), `ctimestamp` = CAST('2012-08-07 01:01:01' as timestamp) +| fields `cdate`, `ctime`, `ctimestamp` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------+----------+---------------------+ +| cdate | ctime | ctimestamp | +|------------+----------+---------------------| +| 2012-08-07 | 01:01:01 | 2012-08-07 01:01:01 | ++------------+----------+---------------------+ +``` + +Cast function can be chained + +```ppl +source=people +| eval `cbool` = CAST(CAST(true as string) as boolean) +| fields `cbool` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------+ +| cbool | +|-------| +| True | ++-------+ +``` + +## IMPLICIT (AUTO) TYPE CONVERSION + +Implicit conversion is automatic casting. When a function does not have an exact match for the +input types, the engine looks for another signature that can safely work with the values. It picks +the option that requires the least stretching of the original types, so you can mix literals and +fields without adding `CAST` everywhere. +### String to numeric + +When a string stands in for a number we simply parse the text: +- The value must be something like `"3.14"` or `"42"`. Anything else causes the query to fail. +- If a string appears next to numeric arguments, it is treated as a `DOUBLE` so the numeric + + overload of the function can run. +Use string in arithmetic operator example + +```ppl +source=people +| eval divide="5"/10, multiply="5" * 10, add="5" + 10, minus="5" - 10, concat="5" + "5" +| fields divide, multiply, add, minus, concat +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+----------+------+-------+--------+ +| divide | multiply | add | minus | concat | +|--------+----------+------+-------+--------| +| 0.5 | 50.0 | 15.0 | -5.0 | 55 | ++--------+----------+------+-------+--------+ +``` + +Use string in comparison operator example + +```ppl +source=people +| eval e="1000"==1000, en="1000"!=1000, ed="1000"==1000.0, edn="1000"!=1000.0, l="1000">999, ld="1000">999.9, i="malformed"==1000 +| fields e, en, ed, edn, l, ld, i +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------+-------+------+-------+------+------+------+ +| e | en | ed | edn | l | ld | i | +|------+-------+------+-------+------+------+------| +| True | False | True | False | True | True | null | ++------+-------+------+-------+------+------+------+ +``` + +## TOSTRING + +### Description + +The following usage options are available, depending on the parameter types and the number of parameters. +Usage with format type: tostring(ANY, [format]): Converts the value in first argument to provided format type string in second argument. If second argument is not provided, then it converts to default string representation. +Return type: string +Usage for boolean parameter without format type tostring(boolean): Converts the string to 'TRUE' or 'FALSE'. +Return type: string +You can use this function with the eval commands and as part of eval expressions. If first argument can be any valid type , second argument is optional and if provided , it needs to be format name to convert to where first argument contains only numbers. If first argument is boolean, then second argument is not used even if its provided. +Format types: +1. "binary" Converts a number to a binary value. +2. "hex" Converts the number to a hexadecimal value. +3. "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. +4. "duration" Converts the value in seconds to the readable time format HH:MM:SS. +5. "duration_millis" Converts the value in milliseconds to the readable time format HH:MM:SS. + +The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. +Basic examples: +You can use this function to convert a number to a string of its binary representation. +Example + +```ppl +source=accounts +| where firstname = "Amber" +| eval balance_binary = tostring(balance, "binary") +| fields firstname, balance_binary, balance +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+------------------+---------+ +| firstname | balance_binary | balance | +|-----------+------------------+---------| +| Amber | 1001100100111001 | 39225 | ++-----------+------------------+---------+ +``` + +You can use this function to convert a number to a string of its hex representation. +Example + +```ppl +source=accounts +| where firstname = "Amber" +| eval balance_hex = tostring(balance, "hex") +| fields firstname, balance_hex, balance +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+-------------+---------+ +| firstname | balance_hex | balance | +|-----------+-------------+---------| +| Amber | 9939 | 39225 | ++-----------+-------------+---------+ +``` + +The following example formats the column totalSales to display values with commas. +Example + +```ppl +source=accounts +| where firstname = "Amber" +| eval balance_commas = tostring(balance, "commas") +| fields firstname, balance_commas, balance +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+----------------+---------+ +| firstname | balance_commas | balance | +|-----------+----------------+---------| +| Amber | 39,225 | 39225 | ++-----------+----------------+---------+ +``` + +The following example converts number of seconds to HH:MM:SS format representing hours, minutes and seconds. +Example + +```ppl +source=accounts +| where firstname = "Amber" +| eval duration = tostring(6500, "duration") +| fields firstname, duration +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+----------+ +| firstname | duration | +|-----------+----------| +| Amber | 01:48:20 | ++-----------+----------+ +``` + +The following example for converts boolean parameter to string. +Example + +```ppl +source=accounts +| where firstname = "Amber" +| eval `boolean_str` = tostring(1=1) +| fields `boolean_str` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------+ +| boolean_str | +|-------------| +| TRUE | ++-------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/cryptographic.md b/docs/user/ppl/functions/cryptographic.md new file mode 100644 index 00000000000..a46fb85ff6f --- /dev/null +++ b/docs/user/ppl/functions/cryptographic.md @@ -0,0 +1,101 @@ +# PPL Cryptographic Functions + +## MD5 + +### Description + +Version: 3.1.0 +Usage: `md5(str)` calculates the MD5 digest and returns the value as a 32 character hex string. +Argument type: STRING +Return type: STRING +Example + +```ppl +source=people +| eval `MD5('hello')` = MD5('hello') +| fields `MD5('hello')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------+ +| MD5('hello') | +|----------------------------------| +| 5d41402abc4b2a76b9719d911017c592 | ++----------------------------------+ +``` + +## SHA1 + +### Description + +Version: 3.1.0 +Usage: `sha1(str)` returns the hex string result of SHA-1. +Argument type: STRING +Return type: STRING +Example + +```ppl +source=people +| eval `SHA1('hello')` = SHA1('hello') +| fields `SHA1('hello')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------------------------+ +| SHA1('hello') | +|------------------------------------------| +| aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d | ++------------------------------------------+ +``` + +## SHA2 + +### Description + +Version: 3.1.0 +Usage: `sha2(str, numBits)` returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512). +The numBits indicates the desired bit length of the result, which must have a value of 224, 256, 384, or 512. +Argument type: STRING, INTEGER +Return type: STRING +Example + +```ppl +source=people +| eval `SHA2('hello',256)` = SHA2('hello',256) +| fields `SHA2('hello',256)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------------------------------------------------+ +| SHA2('hello',256) | +|------------------------------------------------------------------| +| 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 | ++------------------------------------------------------------------+ +``` + +```ppl +source=people +| eval `SHA2('hello',512)` = SHA2('hello',512) +| fields `SHA2('hello',512)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------------------------------------------------------------------------------------------------------+ +| SHA2('hello',512) | +|----------------------------------------------------------------------------------------------------------------------------------| +| 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043 | ++----------------------------------------------------------------------------------------------------------------------------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/cryptographic.rst b/docs/user/ppl/functions/cryptographic.rst deleted file mode 100644 index c31121c3014..00000000000 --- a/docs/user/ppl/functions/cryptographic.rst +++ /dev/null @@ -1,90 +0,0 @@ -=========================== -PPL Cryptographic Functions -=========================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -MD5 ---- - -Description ->>>>>>>>>>> - -Version: 3.1.0 - -Usage: ``md5(str)`` calculates the MD5 digest and returns the value as a 32 character hex string. - -Argument type: STRING - -Return type: STRING - -Example:: - - os> source=people | eval `MD5('hello')` = MD5('hello') | fields `MD5('hello')` - fetched rows / total rows = 1/1 - +----------------------------------+ - | MD5('hello') | - |----------------------------------| - | 5d41402abc4b2a76b9719d911017c592 | - +----------------------------------+ - -SHA1 ----- - -Description ->>>>>>>>>>> - -Version: 3.1.0 - -Usage: ``sha1(str)`` returns the hex string result of SHA-1. - -Argument type: STRING - -Return type: STRING - -Example:: - - os> source=people | eval `SHA1('hello')` = SHA1('hello') | fields `SHA1('hello')` - fetched rows / total rows = 1/1 - +------------------------------------------+ - | SHA1('hello') | - |------------------------------------------| - | aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d | - +------------------------------------------+ - -SHA2 ----- - -Description ->>>>>>>>>>> - -Version: 3.1.0 - -Usage: ``sha2(str, numBits)`` returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512). -The numBits indicates the desired bit length of the result, which must have a value of 224, 256, 384, or 512. - -Argument type: STRING, INTEGER - -Return type: STRING - -Example:: - - os> source=people | eval `SHA2('hello',256)` = SHA2('hello',256) | fields `SHA2('hello',256)` - fetched rows / total rows = 1/1 - +------------------------------------------------------------------+ - | SHA2('hello',256) | - |------------------------------------------------------------------| - | 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 | - +------------------------------------------------------------------+ - - os> source=people | eval `SHA2('hello',512)` = SHA2('hello',512) | fields `SHA2('hello',512)` - fetched rows / total rows = 1/1 - +----------------------------------------------------------------------------------------------------------------------------------+ - | SHA2('hello',512) | - |----------------------------------------------------------------------------------------------------------------------------------| - | 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043 | - +----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/user/ppl/functions/datetime.md b/docs/user/ppl/functions/datetime.md new file mode 100644 index 00000000000..d8c72389d18 --- /dev/null +++ b/docs/user/ppl/functions/datetime.md @@ -0,0 +1,2782 @@ +# Date and Time Functions + + All PPL date and time functions use the UTC time zone. Both input and output values are interpreted as UTC. + For instance, an input timestamp literal like '2020-08-26 01:01:01' is assumed to be in UTC, and the now() + function also returns the current date and time in UTC. +## ADDDATE + +### Description + +Usage: adddate(date, INTERVAL expr unit) / adddate(date, days) adds the interval of second argument to date; adddate(date, days) adds the second argument as integer number of days to date. +If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. +Argument type: DATE/TIMESTAMP/TIME, INTERVAL/LONG +Return type map: +(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP +(DATE, LONG) -> DATE +(TIMESTAMP/TIME, LONG) -> TIMESTAMP +Synonyms: [DATE_ADD](#date_add) when invoked with the INTERVAL form of the second argument. +Antonyms: [SUBDATE](#subdate) +Example + +```ppl +source=people +| eval `'2020-08-26' + 1h` = ADDDATE(DATE('2020-08-26'), INTERVAL 1 HOUR), `'2020-08-26' + 1` = ADDDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' + 1` = ADDDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) +| fields `'2020-08-26' + 1h`, `'2020-08-26' + 1`, `ts '2020-08-26 01:01:01' + 1` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+------------------+------------------------------+ +| '2020-08-26' + 1h | '2020-08-26' + 1 | ts '2020-08-26 01:01:01' + 1 | +|---------------------+------------------+------------------------------| +| 2020-08-26 01:00:00 | 2020-08-27 | 2020-08-27 01:01:01 | ++---------------------+------------------+------------------------------+ +``` + +## ADDTIME + +### Description + +Usage: addtime(expr1, expr2) adds expr2 to expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. +Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME +Return type map: +(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP +(TIME, DATE/TIMESTAMP/TIME) -> TIME +Antonyms: [SUBTIME](#subtime) +Example + +```ppl +source=people +| eval `'2008-12-12' + 0` = ADDTIME(DATE('2008-12-12'), DATE('2008-11-15')) +| fields `'2008-12-12' + 0` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+ +| '2008-12-12' + 0 | +|---------------------| +| 2008-12-12 00:00:00 | ++---------------------+ +``` + +```ppl +source=people +| eval `'23:59:59' + 0` = ADDTIME(TIME('23:59:59'), DATE('2004-01-01')) +| fields `'23:59:59' + 0` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+ +| '23:59:59' + 0 | +|----------------| +| 23:59:59 | ++----------------+ +``` + +```ppl +source=people +| eval `'2004-01-01' + '23:59:59'` = ADDTIME(DATE('2004-01-01'), TIME('23:59:59')) +| fields `'2004-01-01' + '23:59:59'` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------+ +| '2004-01-01' + '23:59:59' | +|---------------------------| +| 2004-01-01 23:59:59 | ++---------------------------+ +``` + +```ppl +source=people +| eval `'10:20:30' + '00:05:42'` = ADDTIME(TIME('10:20:30'), TIME('00:05:42')) +| fields `'10:20:30' + '00:05:42'` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| '10:20:30' + '00:05:42' | +|-------------------------| +| 10:26:12 | ++-------------------------+ +``` + +```ppl +source=people +| eval `'2007-02-28 10:20:30' + '20:40:50'` = ADDTIME(TIMESTAMP('2007-02-28 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) +| fields `'2007-02-28 10:20:30' + '20:40:50'` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------------------+ +| '2007-02-28 10:20:30' + '20:40:50' | +|------------------------------------| +| 2007-03-01 07:01:20 | ++------------------------------------+ +``` + +## CONVERT_TZ + +### Description + +Usage: convert_tz(timestamp, from_timezone, to_timezone) constructs a local timestamp converted from the from_timezone to the to_timezone. CONVERT_TZ returns null when any of the three function arguments are invalid, i.e. timestamp is not in the format yyyy-MM-dd HH:mm:ss or the timeszone is not in (+/-)HH:mm. It also is invalid for invalid dates, such as February 30th and invalid timezones, which are ones outside of -13:59 and +14:00. +Argument type: TIMESTAMP/STRING, STRING, STRING +Return type: TIMESTAMP +Conversion from +00:00 timezone to +10:00 timezone. Returns the timestamp argument converted from +00:00 to +10:00 +Example + +```ppl +source=people +| eval `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+10:00') +| fields `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | +|-----------------------------------------------------| +| 2008-05-15 22:00:00 | ++-----------------------------------------------------+ +``` + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +15:00 in this example will return null. +Example + +```ppl +source=people +| eval `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+15:00') +| fields `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-05-15 12:00:00','+00:00','+15:00') | +|-----------------------------------------------------| +| null | ++-----------------------------------------------------+ +``` + +Conversion from a positive timezone to a negative timezone that goes over date line. +Example + +```ppl +source=people +| eval `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` = convert_tz('2008-05-15 12:00:00','+03:30','-10:00') +| fields `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | +|-----------------------------------------------------| +| 2008-05-14 22:30:00 | ++-----------------------------------------------------+ +``` + +Valid dates are required in convert_tz, invalid dates such as April 31st (not a date in the Gregorian calendar) will result in null. +Example + +```ppl +source=people +| eval `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` = convert_tz('2008-04-31 12:00:00','+03:30','-10:00') +| fields `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | +|-----------------------------------------------------| +| null | ++-----------------------------------------------------+ +``` + +Valid dates are required in convert_tz, invalid dates such as February 30th (not a date in the Gregorian calendar) will result in null. +Example + +```ppl +source=people +| eval `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-30 12:00:00','+03:30','-10:00') +| fields `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | +|-----------------------------------------------------| +| null | ++-----------------------------------------------------+ +``` + +February 29th 2008 is a valid date because it is a leap year. +Example + +```ppl +source=people +| eval `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-29 12:00:00','+03:30','-10:00') +| fields `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | +|-----------------------------------------------------| +| 2008-02-28 22:30:00 | ++-----------------------------------------------------+ +``` + +Valid dates are required in convert_tz, invalid dates such as February 29th 2007 (2007 is not a leap year) will result in null. +Example + +```ppl +source=people +| eval `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2007-02-29 12:00:00','+03:30','-10:00') +| fields `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | +|-----------------------------------------------------| +| null | ++-----------------------------------------------------+ +``` + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:01 in this example will return null. +Example + +```ppl +source=people +| eval `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:01','+00:00') +| fields `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | +|-----------------------------------------------------| +| null | ++-----------------------------------------------------+ +``` + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:00 in this example will return a correctly converted date time object. +Example + +```ppl +source=people +| eval `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:00','+00:00') +| fields `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | +|-----------------------------------------------------| +| 2008-01-31 22:00:00 | ++-----------------------------------------------------+ +``` + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as -14:00 will result in null +Example + +```ppl +source=people +| eval `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','-14:00','+00:00') +| fields `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | +|-----------------------------------------------------| +| null | ++-----------------------------------------------------+ +``` + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. This timezone is within range so it is valid and will convert the time. +Example + +```ppl +source=people +| eval `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` = convert_tz('2008-02-01 12:00:00','-13:59','+00:00') +| fields `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+ +| convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | +|-----------------------------------------------------| +| 2008-02-02 01:59:00 | ++-----------------------------------------------------+ +``` + +## CURDATE + +### Description + +Returns the current date as a value in 'YYYY-MM-DD' format. +CURDATE() returns the current date in UTC at the time the statement is executed. +Return type: DATE +Specification: CURDATE() -> DATE +Example + +```ppl ignore +source=people +| eval `CURDATE()` = CURDATE() +| fields `CURDATE()` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------+ +| CURDATE() | +|------------| +| 2025-08-02 | ++------------+ +``` + +## CURRENT_DATE + +### Description + +`CURRENT_DATE()` is a synonym for [CURDATE()](#curdate). +Example + +```ppl ignore +source=people +| eval `CURRENT_DATE()` = CURRENT_DATE() +| fields `CURRENT_DATE()` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------+ +| CURRENT_DATE() | +|------------------+ +| 2025-08-02 | ++------------------+ +``` + +## CURRENT_TIME + +### Description + +`CURRENT_TIME()` is a synonym for [CURTIME()](#curtime). +Example + +```ppl ignore +source=people +| eval `CURRENT_TIME()` = CURRENT_TIME() +| fields `CURRENT_TIME()` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------+ +| CURRENT_TIME() | +|------------------+ +| 15:39:05 | ++------------------+ +``` + +## CURRENT_TIMESTAMP + +### Description + +`CURRENT_TIMESTAMP()` is a synonym for [NOW()](#now). +Example + +```ppl ignore +source=people +| eval `CURRENT_TIMESTAMP()` = CURRENT_TIMESTAMP() +| fields `CURRENT_TIMESTAMP()` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------+ +| CURRENT_TIMESTAMP() | +|-----------------------+ +| 2025-08-02 15:54:19 | ++-----------------------+ +``` + +## CURTIME + +### Description + +Returns the current time as a value in 'hh:mm:ss' format in the UTC time zone. +CURTIME() returns the time at which the statement began to execute as [NOW()](#now) does. +Return type: TIME +Specification: CURTIME() -> TIME +Example + +```ppl ignore +source=people +| eval `value_1` = CURTIME(), `value_2` = CURTIME() +| fields `value_1`, `value_2` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+----------+ +| value_1 | value_2 | +|----------+----------| +| 15:39:05 | 15:39:05 | ++----------+----------+ +``` + +## DATE + +### Description + +Usage: date(expr) constructs a date type with the input string expr as a date. If the argument is of date/timestamp, it extracts the date value part from the expression. +Argument type: STRING/DATE/TIMESTAMP +Return type: DATE +Example + +```ppl +source=people +| eval `DATE('2020-08-26')` = DATE('2020-08-26') +| fields `DATE('2020-08-26')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| DATE('2020-08-26') | +|--------------------| +| 2020-08-26 | ++--------------------+ +``` + +```ppl +source=people +| eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) +| fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------------+ +| DATE(TIMESTAMP('2020-08-26 13:49:00')) | +|----------------------------------------| +| 2020-08-26 | ++----------------------------------------+ +``` + +```ppl +source=people +| eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') +| fields `DATE('2020-08-26 13:49')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------+ +| DATE('2020-08-26 13:49') | +|--------------------------| +| 2020-08-26 | ++--------------------------+ +``` + +```ppl +source=people +| eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') +| fields `DATE('2020-08-26 13:49')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------+ +| DATE('2020-08-26 13:49') | +|--------------------------| +| 2020-08-26 | ++--------------------------+ +``` + +## DATE_ADD + +### Description + +Usage: date_add(date, INTERVAL expr unit) adds the interval expr to date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. +Argument type: DATE/TIMESTAMP/TIME, INTERVAL +Return type: TIMESTAMP +Synonyms: [ADDDATE](#adddate) +Antonyms: [DATE_SUB](#date_sub) +Example + +```ppl +source=people +| eval `'2020-08-26' + 1h` = DATE_ADD(DATE('2020-08-26'), INTERVAL 1 HOUR), `ts '2020-08-26 01:01:01' + 1d` = DATE_ADD(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 DAY) +| fields `'2020-08-26' + 1h`, `ts '2020-08-26 01:01:01' + 1d` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+-------------------------------+ +| '2020-08-26' + 1h | ts '2020-08-26 01:01:01' + 1d | +|---------------------+-------------------------------| +| 2020-08-26 01:00:00 | 2020-08-27 01:01:01 | ++---------------------+-------------------------------+ +``` + +## DATE_FORMAT + +### Description + +Usage: date_format(date, format) formats the date argument using the specifiers in the format argument. +If an argument of type TIME is provided, the local date is used. +The following table describes the available specifier arguments. + + +| Specifier | Description | +| --- | --- | +| %a | Abbreviated weekday name (Sun..Sat) | +| %b | Abbreviated month name (Jan..Dec) | +| %c | Month, numeric (0..12) | +| %D | Day of the month with English suffix (0th, 1st, 2nd, 3rd, ...) | +| %d | Day of the month, numeric (00..31) | +| %e | Day of the month, numeric (0..31) | +| %f | Microseconds (000000..999999) | +| %H | Hour (00..23) | +| %h | Hour (01..12) | +| %I | Hour (01..12) | +| %i | Minutes, numeric (00..59) | +| %j | Day of year (001..366) | +| %k | Hour (0..23) | +| %l | Hour (1..12) | +| %M | Month name (January..December) | +| %m | Month, numeric (00..12) | +| %p | AM or PM | +| %r | Time, 12-hour (hh:mm:ss followed by AM or PM) | +| %S | Seconds (00..59) | +| %s | Seconds (00..59) | +| %T | Time, 24-hour (hh:mm:ss) | +| %U | Week (00..53), where Sunday is the first day of the week; WEEK() mode 0 | +| %u | Week (00..53), where Monday is the first day of the week; WEEK() mode 1 | +| %V | Week (01..53), where Sunday is the first day of the week; WEEK() mode 2; used with %X | +| %v | Week (01..53), where Monday is the first day of the week; WEEK() mode 3; used with %x | +| %W | Weekday name (Sunday..Saturday) | +| %w | Day of the week (0=Sunday..6=Saturday) | +| %X | Year for the week where Sunday is the first day of the week, numeric, four digits; used with %V | +| %x | Year for the week, where Monday is the first day of the week, numeric, four digits; used with %v | +| %Y | Year, numeric, four digits | +| %y | Year, numeric (two digits) | +| %% | A literal % character | +| %x | x, for any “x” not listed above | +| x | x, for any smallcase/uppercase alphabet except [aydmshiHIMYDSEL] | + + +Argument type: STRING/DATE/TIME/TIMESTAMP, STRING +Return type: STRING +Example + +```ppl +source=people +| eval `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')` = DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') +| fields `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------------------------+---------------------------------------------------------------------+ +| DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f') | DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | +|----------------------------------------------------+---------------------------------------------------------------------| +| 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | ++----------------------------------------------------+---------------------------------------------------------------------+ +``` + +## DATETIME + +### Description + +Usage: DATETIME(timestamp)/ DATETIME(date, to_timezone) Converts the datetime to a new timezone +Argument type: timestamp/STRING +Return type map: +(TIMESTAMP, STRING) -> TIMESTAMP +(TIMESTAMP) -> TIMESTAMP +Converting timestamp with timezone to the second argument timezone. +Example + +```ppl +source=people +| eval `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` = DATETIME('2004-02-28 23:00:00-10:00', '+10:00') +| fields `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------------------------+ +| DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | +|-------------------------------------------------| +| 2004-02-29 19:00:00 | ++-------------------------------------------------+ +``` + +The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. +Example + +```ppl +source=people +| eval `DATETIME('2008-01-01 02:00:00', '-14:00')` = DATETIME('2008-01-01 02:00:00', '-14:00') +| fields `DATETIME('2008-01-01 02:00:00', '-14:00')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------------------+ +| DATETIME('2008-01-01 02:00:00', '-14:00') | +|-------------------------------------------| +| null | ++-------------------------------------------+ +``` + +## DATE_SUB + +### Description + +Usage: date_sub(date, INTERVAL expr unit) subtracts the interval expr from date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. +Argument type: DATE/TIMESTAMP/TIME, INTERVAL +Return type: TIMESTAMP +Synonyms: [SUBDATE](#subdate) +Antonyms: [DATE_ADD](#date_add) +Example + +```ppl +source=people +| eval `'2008-01-02' - 31d` = DATE_SUB(DATE('2008-01-02'), INTERVAL 31 DAY), `ts '2020-08-26 01:01:01' + 1h` = DATE_SUB(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 HOUR) +| fields `'2008-01-02' - 31d`, `ts '2020-08-26 01:01:01' + 1h` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+-------------------------------+ +| '2008-01-02' - 31d | ts '2020-08-26 01:01:01' + 1h | +|---------------------+-------------------------------| +| 2007-12-02 00:00:00 | 2020-08-26 00:01:01 | ++---------------------+-------------------------------+ +``` + +## DATEDIFF + +Usage: Calculates the difference of date parts of given values. If the first argument is time, today's date is used. +Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME +Return type: LONG +Example + +```ppl +source=people +| eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')), `today - today` = DATEDIFF(TIME('23:59:59'), TIME('00:00:00')) +| fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`, `today - today` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+-----------------------------+---------------+ +| '2000-01-02' - '2000-01-01' | '2001-02-01' - '2004-01-01' | today - today | +|-----------------------------+-----------------------------+---------------| +| 1 | -1064 | 0 | ++-----------------------------+-----------------------------+---------------+ +``` + +## DAY + +### Description + +Usage: day(date) extracts the day of the month for date, in the range 1 to 31. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [DAYOFMONTH](#dayofmonth), [DAY_OF_MONTH](#day_of_month) +Example + +```ppl +source=people +| eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) +| fields `DAY(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| DAY(DATE('2020-08-26')) | +|-------------------------| +| 26 | ++-------------------------+ +``` + +## DAYNAME + +### Description + +Usage: dayname(date) returns the name of the weekday for date, including Monday, Tuesday, Wednesday, Thursday, Friday, Saturday and Sunday. +Argument type: STRING/DATE/TIMESTAMP +Return type: STRING +Example + +```ppl +source=people +| eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) +| fields `DAYNAME(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+ +| DAYNAME(DATE('2020-08-26')) | +|-----------------------------| +| Wednesday | ++-----------------------------+ +``` + +## DAYOFMONTH + +### Description + +Usage: dayofmonth(date) extracts the day of the month for date, in the range 1 to 31. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [DAY](#day), [DAY_OF_MONTH](#day_of_month) +Example + +```ppl +source=people +| eval `DAYOFMONTH(DATE('2020-08-26'))` = DAYOFMONTH(DATE('2020-08-26')) +| fields `DAYOFMONTH(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------+ +| DAYOFMONTH(DATE('2020-08-26')) | +|--------------------------------| +| 26 | ++--------------------------------+ +``` + +## DAY_OF_MONTH + +### Description + +Usage: day_of_month(date) extracts the day of the month for date, in the range 1 to 31. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [DAY](#day), [DAYOFMONTH](#dayofmonth) +Example + +```ppl +source=people +| eval `DAY_OF_MONTH(DATE('2020-08-26'))` = DAY_OF_MONTH(DATE('2020-08-26')) +| fields `DAY_OF_MONTH(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------+ +| DAY_OF_MONTH(DATE('2020-08-26')) | +|----------------------------------| +| 26 | ++----------------------------------+ +``` + +## DAYOFWEEK + +### Description + +Usage: dayofweek(date) returns the weekday index for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday). +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [DAY_OF_WEEK](#day_of_week) +Example + +```ppl +source=people +| eval `DAYOFWEEK(DATE('2020-08-26'))` = DAYOFWEEK(DATE('2020-08-26')) +| fields `DAYOFWEEK(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------+ +| DAYOFWEEK(DATE('2020-08-26')) | +|-------------------------------| +| 4 | ++-------------------------------+ +``` + +## DAY_OF_WEEK + +### Description + +Usage: day_of_week(date) returns the weekday index for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday). +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [DAYOFWEEK](#dayofweek) +Example + +```ppl +source=people +| eval `DAY_OF_WEEK(DATE('2020-08-26'))` = DAY_OF_WEEK(DATE('2020-08-26')) +| fields `DAY_OF_WEEK(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------------+ +| DAY_OF_WEEK(DATE('2020-08-26')) | +|---------------------------------| +| 4 | ++---------------------------------+ +``` + +## DAYOFYEAR + +### Description + +Usage: dayofyear(date) returns the day of the year for date, in the range 1 to 366. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [DAY_OF_YEAR](#day_of_year) +Example + +```ppl +source=people +| eval `DAYOFYEAR(DATE('2020-08-26'))` = DAYOFYEAR(DATE('2020-08-26')) +| fields `DAYOFYEAR(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------+ +| DAYOFYEAR(DATE('2020-08-26')) | +|-------------------------------| +| 239 | ++-------------------------------+ +``` + +## DAY_OF_YEAR + +### Description + +Usage: day_of_year(date) returns the day of the year for date, in the range 1 to 366. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [DAYOFYEAR](#dayofyear) +Example + +```ppl +source=people +| eval `DAY_OF_YEAR(DATE('2020-08-26'))` = DAY_OF_YEAR(DATE('2020-08-26')) +| fields `DAY_OF_YEAR(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------------+ +| DAY_OF_YEAR(DATE('2020-08-26')) | +|---------------------------------| +| 239 | ++---------------------------------+ +``` + +## EXTRACT + +### Description + +Usage: extract(part FROM date) returns a LONG with digits in order according to the given 'part' arguments. +The specific format of the returned long is determined by the table below. +Argument type: PART, where PART is one of the following tokens in the table below. +The format specifiers found in this table are the same as those found in the [DATE_FORMAT](#date_format) function. +The following table describes the mapping of a 'part' to a particular format. + + +| Part | Format | +| --- | --- | +| MICROSECOND | %f | +| SECOND | %s | +| MINUTE | %i | +| HOUR | %H | +| DAY | %d | +| WEEK | %X | +| MONTH | %m | +| YEAR | %V | +| SECOND_MICROSECOND | %s%f | +| MINUTE_MICROSECOND | %i%s%f | +| MINUTE_SECOND | %i%s | +| HOUR_MICROSECOND | %H%i%s%f | +| HOUR_SECOND | %H%i%s | +| HOUR_MINUTE | %H%i | +| DAY_MICROSECOND | %d%H%i%s%f | +| DAY_SECOND | %d%H%i%s | +| DAY_MINUTE | %d%H%i | +| DAY_HOUR | %d%H% | +| YEAR_MONTH | %V%m | + + +Return type: LONG +Example + +```ppl +source=people +| eval `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` = extract(YEAR_MONTH FROM "2023-02-07 10:11:12") +| fields `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------------------------------+ +| extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | +|------------------------------------------------| +| 202302 | ++------------------------------------------------+ +``` + +## FROM_DAYS + +### Description + +Usage: from_days(N) returns the date value given the day number N. +Argument type: INTEGER/LONG +Return type: DATE +Example + +```ppl +source=people +| eval `FROM_DAYS(733687)` = FROM_DAYS(733687) +| fields `FROM_DAYS(733687)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+ +| FROM_DAYS(733687) | +|-------------------| +| 2008-10-07 | ++-------------------+ +``` + +## FROM_UNIXTIME + +### Description + +Usage: Returns a representation of the argument given as a timestamp or character string value. Perform reverse conversion for [UNIX_TIMESTAMP](#unix_timestamp) function. +If second argument is provided, it is used to format the result in the same way as the format string used for the [DATE_FORMAT](#date_format) function. +If timestamp is outside of range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), function returns NULL. +Argument type: DOUBLE, STRING +Return type map: +DOUBLE -> TIMESTAMP +DOUBLE, STRING -> STRING +Examples + +```ppl +source=people +| eval `FROM_UNIXTIME(1220249547)` = FROM_UNIXTIME(1220249547) +| fields `FROM_UNIXTIME(1220249547)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------+ +| FROM_UNIXTIME(1220249547) | +|---------------------------| +| 2008-09-01 06:12:27 | ++---------------------------+ +``` + +```ppl +source=people +| eval `FROM_UNIXTIME(1220249547, '%T')` = FROM_UNIXTIME(1220249547, '%T') +| fields `FROM_UNIXTIME(1220249547, '%T')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------------+ +| FROM_UNIXTIME(1220249547, '%T') | +|---------------------------------| +| 06:12:27 | ++---------------------------------+ +``` + +## GET_FORMAT + +### Description + +Usage: Returns a string value containing string format specifiers based on the input arguments. +Argument type: TYPE, STRING, where TYPE must be one of the following tokens: [DATE, TIME, TIMESTAMP], and +STRING must be one of the following tokens: ["USA", "JIS", "ISO", "EUR", "INTERNAL"] (" can be replaced by '). +Examples + +```ppl +source=people +| eval `GET_FORMAT(DATE, 'USA')` = GET_FORMAT(DATE, 'USA') +| fields `GET_FORMAT(DATE, 'USA')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| GET_FORMAT(DATE, 'USA') | +|-------------------------| +| %m.%d.%Y | ++-------------------------+ +``` + +## HOUR + +### Description + +Usage: hour(time) extracts the hour value for time. Different from the time of day value, the time value has a large range and can be greater than 23, so the return value of hour(time) can be also greater than 23. +Argument type: STRING/TIME/TIMESTAMP +Return type: INTEGER +Synonyms: [HOUR_OF_DAY](#hour_of_day) +Example + +```ppl +source=people +| eval `HOUR(TIME('01:02:03'))` = HOUR(TIME('01:02:03')) +| fields `HOUR(TIME('01:02:03'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------+ +| HOUR(TIME('01:02:03')) | +|------------------------| +| 1 | ++------------------------+ +``` + +## HOUR_OF_DAY + +### Description + +Usage: hour_of_day(time) extracts the hour value for time. Different from the time of day value, the time value has a large range and can be greater than 23, so the return value of hour_of_day(time) can be also greater than 23. +Argument type: STRING/TIME/TIMESTAMP +Return type: INTEGER +Synonyms: [HOUR](#hour) +Example + +```ppl +source=people +| eval `HOUR_OF_DAY(TIME('01:02:03'))` = HOUR_OF_DAY(TIME('01:02:03')) +| fields `HOUR_OF_DAY(TIME('01:02:03'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------+ +| HOUR_OF_DAY(TIME('01:02:03')) | +|-------------------------------| +| 1 | ++-------------------------------+ +``` + +## LAST_DAY + +Usage: Returns the last day of the month as a DATE for a valid argument. +Argument type: DATE/STRING/TIMESTAMP/TIME +Return type: DATE +Example + +```ppl +source=people +| eval `last_day('2023-02-06')` = last_day('2023-02-06') +| fields `last_day('2023-02-06')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------+ +| last_day('2023-02-06') | +|------------------------| +| 2023-02-28 | ++------------------------+ +``` + +## LOCALTIMESTAMP + +### Description + +`LOCALTIMESTAMP()` are synonyms for [NOW()](#now). +Example + +```ppl ignore +source=people +| eval `LOCALTIMESTAMP()` = LOCALTIMESTAMP() +| fields `LOCALTIMESTAMP()` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+ +| LOCALTIMESTAMP() | +|---------------------+ +| 2025-08-02 15:54:19 | ++---------------------+ +``` + +## LOCALTIME + +### Description + +`LOCALTIME()` are synonyms for [NOW()](#now). +Example + +```ppl ignore +source=people +| eval `LOCALTIME()` = LOCALTIME() +| fields `LOCALTIME()` +``` + +Expected output: + +```text ignore +fetched rows / total rows = 1/1 ++---------------------+ +| LOCALTIME() | +|---------------------+ +| 2025-08-02 15:54:19 | ++---------------------+ +``` + +## MAKEDATE + +### Description + +Returns a date, given `year` and `day-of-year` values. `dayofyear` must be greater than 0 or the result is `NULL`. The result is also `NULL` if either argument is `NULL`. +Arguments are rounded to an integer. +Limitations: +- Zero `year` interpreted as 2000; +- Negative `year` is not accepted; +- `day-of-year` should be greater than zero; +- `day-of-year` could be greater than 365/366, calculation switches to the next year(s) (see example). + +Specifications: +1. MAKEDATE(DOUBLE, DOUBLE) -> DATE + +Argument type: DOUBLE +Return type: DATE +Example + +```ppl +source=people +| eval `MAKEDATE(1945, 5.9)` = MAKEDATE(1945, 5.9), `MAKEDATE(1984, 1984)` = MAKEDATE(1984, 1984) +| fields `MAKEDATE(1945, 5.9)`, `MAKEDATE(1984, 1984)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+----------------------+ +| MAKEDATE(1945, 5.9) | MAKEDATE(1984, 1984) | +|---------------------+----------------------| +| 1945-01-06 | 1989-06-06 | ++---------------------+----------------------+ +``` + +## MAKETIME + +### Description + +Returns a time value calculated from the hour, minute, and second arguments. Returns `NULL` if any of its arguments are `NULL`. +The second argument can have a fractional part, rest arguments are rounded to an integer. +Limitations: +- 24-hour clock is used, available time range is [00:00:00.0 - 23:59:59.(9)]; +- Up to 9 digits of second fraction part is taken (nanosecond precision). + +Specifications: +1. MAKETIME(DOUBLE, DOUBLE, DOUBLE) -> TIME + +Argument type: DOUBLE +Return type: TIME +Example + +```ppl +source=people +| eval `MAKETIME(20, 30, 40)` = MAKETIME(20, 30, 40), `MAKETIME(20.2, 49.5, 42.100502)` = MAKETIME(20.2, 49.5, 42.100502) +| fields `MAKETIME(20, 30, 40)`, `MAKETIME(20.2, 49.5, 42.100502)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------+---------------------------------+ +| MAKETIME(20, 30, 40) | MAKETIME(20.2, 49.5, 42.100502) | +|----------------------+---------------------------------| +| 20:30:40 | 20:50:42.100502 | ++----------------------+---------------------------------+ +``` + +## MICROSECOND + +### Description + +Usage: microsecond(expr) returns the microseconds from the time or timestamp expression expr as a number in the range from 0 to 999999. +Argument type: STRING/TIME/TIMESTAMP +Return type: INTEGER +Example + +```ppl +source=people +| eval `MICROSECOND(TIME('01:02:03.123456'))` = MICROSECOND(TIME('01:02:03.123456')) +| fields `MICROSECOND(TIME('01:02:03.123456'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------------+ +| MICROSECOND(TIME('01:02:03.123456')) | +|--------------------------------------| +| 123456 | ++--------------------------------------+ +``` + +## MINUTE + +### Description + +Usage: minute(time) returns the minute for time, in the range 0 to 59. +Argument type: STRING/TIME/TIMESTAMP +Return type: INTEGER +Synonyms: [MINUTE_OF_HOUR](#minute_of_hour) +Example + +```ppl +source=people +| eval `MINUTE(TIME('01:02:03'))` = MINUTE(TIME('01:02:03')) +| fields `MINUTE(TIME('01:02:03'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------+ +| MINUTE(TIME('01:02:03')) | +|--------------------------| +| 2 | ++--------------------------+ +``` + +## MINUTE_OF_DAY + +### Description + +Usage: minute(time) returns the amount of minutes in the day, in the range of 0 to 1439. +Argument type: STRING/TIME/TIMESTAMP +Return type: INTEGER +Example + +```ppl +source=people +| eval `MINUTE_OF_DAY(TIME('01:02:03'))` = MINUTE_OF_DAY(TIME('01:02:03')) +| fields `MINUTE_OF_DAY(TIME('01:02:03'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------------+ +| MINUTE_OF_DAY(TIME('01:02:03')) | +|---------------------------------| +| 62 | ++---------------------------------+ +``` + +## MINUTE_OF_HOUR + +### Description + +Usage: minute(time) returns the minute for time, in the range 0 to 59. +Argument type: STRING/TIME/TIMESTAMP +Return type: INTEGER +Synonyms: [MINUTE](#minute) +Example + +```ppl +source=people +| eval `MINUTE_OF_HOUR(TIME('01:02:03'))` = MINUTE_OF_HOUR(TIME('01:02:03')) +| fields `MINUTE_OF_HOUR(TIME('01:02:03'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------+ +| MINUTE_OF_HOUR(TIME('01:02:03')) | +|----------------------------------| +| 2 | ++----------------------------------+ +``` + +## MONTH + +### Description + +Usage: month(date) returns the month for date, in the range 1 to 12 for January to December. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [MONTH_OF_YEAR](#month_of_year) +Example + +```ppl +source=people +| eval `MONTH(DATE('2020-08-26'))` = MONTH(DATE('2020-08-26')) +| fields `MONTH(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------+ +| MONTH(DATE('2020-08-26')) | +|---------------------------| +| 8 | ++---------------------------+ +``` + +## MONTH_OF_YEAR + +### Description + +Usage: month_of_year(date) returns the month for date, in the range 1 to 12 for January to December. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Synonyms: [MONTH](#month) +Example + +```ppl +source=people +| eval `MONTH_OF_YEAR(DATE('2020-08-26'))` = MONTH_OF_YEAR(DATE('2020-08-26')) +| fields `MONTH_OF_YEAR(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------+ +| MONTH_OF_YEAR(DATE('2020-08-26')) | +|-----------------------------------| +| 8 | ++-----------------------------------+ +``` + +## MONTHNAME + +### Description + +Usage: monthname(date) returns the full name of the month for date. +Argument type: STRING/DATE/TIMESTAMP +Return type: STRING +Example + +```ppl +source=people +| eval `MONTHNAME(DATE('2020-08-26'))` = MONTHNAME(DATE('2020-08-26')) +| fields `MONTHNAME(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------+ +| MONTHNAME(DATE('2020-08-26')) | +|-------------------------------| +| August | ++-------------------------------+ +``` + +## NOW + +### Description + +Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss' format. The value is expressed in the UTC time zone. +`NOW()` returns a constant time that indicates the time at which the statement began to execute. This differs from the behavior for [SYSDATE()](#sysdate), which returns the exact time at which it executes. +Return type: TIMESTAMP +Specification: NOW() -> TIMESTAMP +Example + +```ppl ignore +source=people +| eval `value_1` = NOW(), `value_2` = NOW() +| fields `value_1`, `value_2` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+---------------------+ +| value_1 | value_2 | +|---------------------+---------------------| +| 2025-08-02 15:39:05 | 2025-08-02 15:39:05 | ++---------------------+---------------------+ +``` + +## PERIOD_ADD + +### Description + +Usage: period_add(P, N) add N months to period P (in the format YYMM or YYYYMM). Returns a value in the format YYYYMM. +Argument type: INTEGER, INTEGER +Return type: INTEGER +Example + +```ppl +source=people +| eval `PERIOD_ADD(200801, 2)` = PERIOD_ADD(200801, 2), `PERIOD_ADD(200801, -12)` = PERIOD_ADD(200801, -12) +| fields `PERIOD_ADD(200801, 2)`, `PERIOD_ADD(200801, -12)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------+-------------------------+ +| PERIOD_ADD(200801, 2) | PERIOD_ADD(200801, -12) | +|-----------------------+-------------------------| +| 200803 | 200701 | ++-----------------------+-------------------------+ +``` + +## PERIOD_DIFF + +### Description + +Usage: period_diff(P1, P2) returns the number of months between periods P1 and P2 given in the format YYMM or YYYYMM. +Argument type: INTEGER, INTEGER +Return type: INTEGER +Example + +```ppl +source=people +| eval `PERIOD_DIFF(200802, 200703)` = PERIOD_DIFF(200802, 200703), `PERIOD_DIFF(200802, 201003)` = PERIOD_DIFF(200802, 201003) +| fields `PERIOD_DIFF(200802, 200703)`, `PERIOD_DIFF(200802, 201003)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+-----------------------------+ +| PERIOD_DIFF(200802, 200703) | PERIOD_DIFF(200802, 201003) | +|-----------------------------+-----------------------------| +| 11 | -25 | ++-----------------------------+-----------------------------+ +``` + +## QUARTER + +### Description + +Usage: quarter(date) returns the quarter of the year for date, in the range 1 to 4. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Example + +```ppl +source=people +| eval `QUARTER(DATE('2020-08-26'))` = QUARTER(DATE('2020-08-26')) +| fields `QUARTER(DATE('2020-08-26'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------+ +| QUARTER(DATE('2020-08-26')) | +|-----------------------------| +| 3 | ++-----------------------------+ +``` + +## SEC_TO_TIME + +### Description + +Usage: sec_to_time(number) returns the time in HH:mm:ssss[.nnnnnn] format. +Note that the function returns a time between 00:00:00 and 23:59:59. +If an input value is too large (greater than 86399), the function will wrap around and begin returning outputs starting from 00:00:00. +If an input value is too small (less than 0), the function will wrap around and begin returning outputs counting down from 23:59:59. +Argument type: INTEGER, LONG, DOUBLE, FLOAT +Return type: TIME +Example + +```ppl +source=people +| eval `SEC_TO_TIME(3601)` = SEC_TO_TIME(3601) +| eval `SEC_TO_TIME(1234.123)` = SEC_TO_TIME(1234.123) +| fields `SEC_TO_TIME(3601)`, `SEC_TO_TIME(1234.123)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+-----------------------+ +| SEC_TO_TIME(3601) | SEC_TO_TIME(1234.123) | +|-------------------+-----------------------| +| 01:00:01 | 00:20:34.123 | ++-------------------+-----------------------+ +``` + +## SECOND + +### Description + +Usage: second(time) returns the second for time, in the range 0 to 59. +Argument type: STRING/TIME/TIMESTAMP +Return type: INTEGER +Synonyms: [SECOND_OF_MINUTE](#second_of_minute) +Example + +```ppl +source=people +| eval `SECOND(TIME('01:02:03'))` = SECOND(TIME('01:02:03')) +| fields `SECOND(TIME('01:02:03'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------+ +| SECOND(TIME('01:02:03')) | +|--------------------------| +| 3 | ++--------------------------+ +``` + +## SECOND_OF_MINUTE + +### Description + +Usage: second_of_minute(time) returns the second for time, in the range 0 to 59. +Argument type: STRING/TIME/TIMESTAMP +Return type: INTEGER +Synonyms: [SECOND](#second) +Example + +```ppl +source=people +| eval `SECOND_OF_MINUTE(TIME('01:02:03'))` = SECOND_OF_MINUTE(TIME('01:02:03')) +| fields `SECOND_OF_MINUTE(TIME('01:02:03'))` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------------------+ +| SECOND_OF_MINUTE(TIME('01:02:03')) | +|------------------------------------| +| 3 | ++------------------------------------+ +``` + +## STRFTIME + +**Version: 3.3.0** +### Description + +Usage: strftime(time, format) takes a UNIX timestamp (in seconds) and renders it as a string using the format specified. For numeric inputs, the UNIX time must be in seconds. Values greater than 100000000000 are automatically treated as milliseconds and converted to seconds. +You can use time format variables with the strftime function. This function performs the reverse operation of [UNIX_TIMESTAMP](#unix_timestamp) and is similar to [FROM_UNIXTIME](#from_unixtime) but with POSIX-style format specifiers. + - **Available only when Calcite engine is enabled** + - All timestamps are interpreted as UTC timezone + - Text formatting uses language-neutral Locale.ROOT (weekday and month names appear in abbreviated form) + - String inputs are NOT supported - use `unix_timestamp()` to convert strings first + - Functions that return date/time values (like `date()`, `now()`, `timestamp()`) are supported + +Argument type: INTEGER/LONG/DOUBLE/TIMESTAMP, STRING +Return type: STRING +Format specifiers: +The following table describes the available specifier arguments. + + +| Specifier | Description | +| --- | --- | +| %a | Abbreviated weekday name (Mon..Sun) | +| %A | Weekday name (Mon..Sun) - Note: Locale.ROOT uses abbreviated form | +| %b | Abbreviated month name (Jan..Dec) | +| %B | Month name (Jan..Dec) - Note: Locale.ROOT uses abbreviated form | +| %c | Date and time (e.g., Mon Jul 18 09:30:00 2019) | +| %C | Century as 2-digit decimal number | +| %d | Day of the month, zero-padded (01..31) | +| %e | Day of the month, space-padded ( 1..31) | +| %Ez | Timezone offset in minutes from UTC (e.g., +0 for UTC, +330 for IST, -300 for EST) | +| %f | Microseconds as decimal number (000000..999999) | +| %F | ISO 8601 date format (%Y-%m-%d) | +| %g | ISO 8601 year without century (00..99) | +| %G | ISO 8601 year with century | +| %H | Hour (24-hour clock) (00..23) | +| %I | Hour (12-hour clock) (01..12) | +| %j | Day of year (001..366) | +| %k | Hour (24-hour clock), space-padded ( 0..23) | +| %m | Month as decimal number (01..12) | +| %M | Minute (00..59) | +| %N | Subsecond digits (default %9N = nanoseconds). Accepts any precision value from 1-9 (e.g., %3N = 3 digits, %5N = 5 digits, %9N = 9 digits). The precision directly controls the number of digits displayed | +| %p | AM or PM | +| %Q | Subsecond component (default milliseconds). Can specify precision: %3Q = milliseconds, %6Q = microseconds, %9Q = nanoseconds. Other precision values (e.g., %5Q) default to %3Q | +| %s | UNIX Epoch timestamp in seconds | +| %S | Second (00..59) | +| %T | Time in 24-hour notation (%H:%M:%S) | +| %U | Week of year starting from 0 (00..53) | +| %V | ISO week number (01..53) | +| %w | Weekday as decimal (0=Sunday..6=Saturday) | +| %x | Date in MM/dd/yyyy format (e.g., 07/13/2019) | +| %X | Time in HH:mm:ss format (e.g., 09:30:00) | +| %y | Year without century (00..99) | +| %Y | Year with century | +| %z | Timezone offset (+hhmm or -hhmm) | +| %:z | Timezone offset with colon (+hh:mm or -hh:mm) | +| %::z | Timezone offset with colons (+hh:mm:ss) | +| %:::z | Timezone offset hour only (+hh or -hh) | +| %Z | Timezone abbreviation (e.g., EST, PDT) | +| %% | Literal % character | + + +Examples + +```ppl ignore +source=people | eval `strftime(1521467703, "%Y-%m-%dT%H:%M:%S")` = strftime(1521467703, "%Y-%m-%dT%H:%M:%S") | fields `strftime(1521467703, "%Y-%m-%dT%H:%M:%S")` +``` + +```text +fetched rows / total rows = 1/1 ++-------------------------------------------+ +| strftime(1521467703, "%Y-%m-%dT%H:%M:%S") | +|-------------------------------------------| +| 2018-03-19T13:55:03 | ++-------------------------------------------+ +``` + +```ppl ignore +source=people | eval `strftime(1521467703, "%F %T")` = strftime(1521467703, "%F %T") | fields `strftime(1521467703, "%F %T")` +``` + +```text +fetched rows / total rows = 1/1 ++-------------------------------------------+ +| strftime(1521467703, "%Y-%m-%dT%H:%M:%S") | +|-------------------------------------------| +| 2018-03-19T13:55:03 | ++-------------------------------------------+ +``` + +```ppl ignore +source=people | eval `strftime(1521467703, "%a %b %d, %Y")` = strftime(1521467703, "%a %b %d, %Y") | fields `strftime(1521467703, "%a %b %d, %Y")` +``` + +```text +fetched rows / total rows = 1/1 ++--------------------------------------+ +| strftime(1521467703, "%a %b %d, %Y") | +|--------------------------------------| +| Mon Mar 19, 2018 | ++--------------------------------------+ +``` + +```ppl ignore +source=people | eval `strftime(1521467703, "%%Y")` = strftime(1521467703, "%%Y") | fields `strftime(1521467703, "%%Y")` +``` + +```text +fetched rows / total rows = 1/1 ++---------------------------+ +| strftime(1521467703, "%%Y") | +|---------------------------| +| %Y | ++---------------------------+ +``` + +```ppl ignore +source=people | eval `strftime(date('2020-09-16'), "%Y-%m-%d")` = strftime(date('2020-09-16'), "%Y-%m-%d") | fields `strftime(date('2020-09-16'), "%Y-%m-%d")` +```text + +fetched rows / total rows = 1/1 ++----------------------------------------+ +| strftime(date('2020-09-16'), "%Y-%m-%d") | +|-----------------------------------------| +| 2020-09-16 | + ++----------------------------------------+ + +``` +```ppl ignore + +source=people | eval `strftime(timestamp('2020-09-16 14:30:00'), "%F %T")` = strftime(timestamp('2020-09-16 14:30:00'), "%F %T") | fields `strftime(timestamp('2020-09-16 14:30:00'), "%F %T")` + +``` +```text + +fetched rows / total rows = 1/1 ++--------------------------------------------------+ +| strftime(timestamp('2020-09-16 14:30:00'), "%F %T") | +|---------------------------------------------------| +| 2020-09-16 14:30:00 | + ++--------------------------------------------------+ + +``` +```ppl ignore + +source=people | eval `strftime(now(), "%Y-%m-%d %H:%M:%S")` = strftime(now(), "%Y-%m-%d %H:%M:%S") | fields `strftime(now(), "%Y-%m-%d %H:%M:%S")` + +``` +```text + +fetched rows / total rows = 1/1 ++------------------------------------+ +| strftime(now(), "%Y-%m-%d %H:%M:%S") | +|-------------------------------------| +| 2025-09-03 12:30:45 | + ++------------------------------------+ + +``` +## STR_TO_DATE + +### Description + +Usage: str_to_date(string, string) is used to extract a TIMESTAMP from the first argument string using the formats specified in the second argument string. +The input argument must have enough information to be parsed as a DATE, TIMESTAMP, or TIME. +Acceptable string format specifiers are the same as those used in the [DATE_FORMAT](#date_format) function. +It returns NULL when a statement cannot be parsed due to an invalid pair of arguments, and when 0 is provided for any DATE field. Otherwise, it will return a TIMESTAMP with the parsed values (as well as default values for any field that was not parsed). +Argument type: STRING, STRING +Return type: TIMESTAMP +Example + +```ppl + +source=people +| eval `str_to_date("01,5,2013", "%d,%m,%Y")` = str_to_date("01,5,2013", "%d,%m,%Y") +| fields `str_to_date("01,5,2013", "%d,%m,%Y")` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++--------------------------------------+ +| str_to_date("01,5,2013", "%d,%m,%Y") | +|--------------------------------------| +| 2013-05-01 00:00:00 | + ++--------------------------------------+ + +``` + +## SUBDATE + +### Description + +Usage: subdate(date, INTERVAL expr unit) / subdate(date, days) subtracts the interval expr from date; subdate(date, days) subtracts the second argument as integer number of days from date. +If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. +Argument type: DATE/TIMESTAMP/TIME, INTERVAL/LONG +Return type map: +(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP +(DATE, LONG) -> DATE +(TIMESTAMP/TIME, LONG) -> TIMESTAMP +Synonyms: [DATE_SUB](#date_sub) when invoked with the INTERVAL form of the second argument. +Antonyms: [ADDDATE](#adddate) +Example + +```ppl + +source=people +| eval `'2008-01-02' - 31d` = SUBDATE(DATE('2008-01-02'), INTERVAL 31 DAY), `'2020-08-26' - 1` = SUBDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' - 1` = SUBDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) +| fields `'2008-01-02' - 31d`, `'2020-08-26' - 1`, `ts '2020-08-26 01:01:01' - 1` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++---------------------+------------------+------------------------------+ +| '2008-01-02' - 31d | '2020-08-26' - 1 | ts '2020-08-26 01:01:01' - 1 | +|---------------------+------------------+------------------------------| +| 2007-12-02 00:00:00 | 2020-08-25 | 2020-08-25 01:01:01 | + ++---------------------+------------------+------------------------------+ + +``` + +## SUBTIME + +### Description + +Usage: subtime(expr1, expr2) subtracts expr2 from expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. +Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME +Return type map: +(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP +(TIME, DATE/TIMESTAMP/TIME) -> TIME +Antonyms: [ADDTIME](#addtime) +Example + +```ppl + +source=people +| eval `'2008-12-12' - 0` = SUBTIME(DATE('2008-12-12'), DATE('2008-11-15')) +| fields `'2008-12-12' - 0` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++---------------------+ +| '2008-12-12' - 0 | +|---------------------| +| 2008-12-12 00:00:00 | + ++---------------------+ + +``` + +```ppl + +source=people +| eval `'23:59:59' - 0` = SUBTIME(TIME('23:59:59'), DATE('2004-01-01')) +| fields `'23:59:59' - 0` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++----------------+ +| '23:59:59' - 0 | +|----------------| +| 23:59:59 | + ++----------------+ + +``` + +```ppl + +source=people +| eval `'2004-01-01' - '23:59:59'` = SUBTIME(DATE('2004-01-01'), TIME('23:59:59')) +| fields `'2004-01-01' - '23:59:59'` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++---------------------------+ +| '2004-01-01' - '23:59:59' | +|---------------------------| +| 2003-12-31 00:00:01 | + ++---------------------------+ + +``` + +```ppl + +source=people +| eval `'10:20:30' - '00:05:42'` = SUBTIME(TIME('10:20:30'), TIME('00:05:42')) +| fields `'10:20:30' - '00:05:42'` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++-------------------------+ +| '10:20:30' - '00:05:42' | +|-------------------------| +| 10:14:48 | + ++-------------------------+ + +``` + +```ppl + +source=people +| eval `'2007-03-01 10:20:30' - '20:40:50'` = SUBTIME(TIMESTAMP('2007-03-01 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) +| fields `'2007-03-01 10:20:30' - '20:40:50'` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++------------------------------------+ +| '2007-03-01 10:20:30' - '20:40:50' | +|------------------------------------| +| 2007-02-28 13:39:40 | + ++------------------------------------+ + +``` + +## SYSDATE + +### Description + +Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss[.nnnnnn]'. +SYSDATE() returns the date and time at which it executes in UTC. This differs from the behavior for [NOW()](#now), which returns a constant time that indicates the time at which the statement began to execute. +If an argument is given, it specifies a fractional seconds precision from 0 to 6, the return value includes a fractional seconds part of that many digits. +Optional argument type: INTEGER +Return type: TIMESTAMP +Specification: SYSDATE([INTEGER]) -> TIMESTAMP +Example + +```ppl ignore + +source=people +| eval `value_1` = SYSDATE(), `value_2` = SYSDATE(6) +| fields `value_1`, `value_2` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++---------------------+----------------------------+ +| value_1 | value_2 | +|---------------------+----------------------------| +| 2025-08-02 15:39:05 | 2025-08-02 15:39:05.123456 | + ++---------------------+----------------------------+ + +``` + +## TIME + +### Description + +Usage: time(expr) constructs a time type with the input string expr as a time. If the argument is of date/time/timestamp, it extracts the time value part from the expression. +Argument type: STRING/DATE/TIME/TIMESTAMP +Return type: TIME +Example + +```ppl + +source=people +| eval `TIME('13:49:00')` = TIME('13:49:00') +| fields `TIME('13:49:00')` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++------------------+ +| TIME('13:49:00') | +|------------------| +| 13:49:00 | + ++------------------+ + +``` + +```ppl + +source=people +| eval `TIME('13:49')` = TIME('13:49') +| fields `TIME('13:49')` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++---------------+ +| TIME('13:49') | +|---------------| +| 13:49:00 | + ++---------------+ + +``` + +```ppl + +source=people +| eval `TIME('2020-08-26 13:49:00')` = TIME('2020-08-26 13:49:00') +| fields `TIME('2020-08-26 13:49:00')` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++-----------------------------+ +| TIME('2020-08-26 13:49:00') | +|-----------------------------| +| 13:49:00 | + ++-----------------------------+ + +``` + +```ppl + +source=people +| eval `TIME('2020-08-26 13:49')` = TIME('2020-08-26 13:49') +| fields `TIME('2020-08-26 13:49')` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++--------------------------+ +| TIME('2020-08-26 13:49') | +|--------------------------| +| 13:49:00 | + ++--------------------------+ + +``` + +## TIME_FORMAT + +### Description + +Usage: time_format(time, format) formats the time argument using the specifiers in the format argument. +This supports a subset of the time format specifiers available for the [date_format](#date_format) function. +Using date format specifiers supported by [date_format](#date_format) will return 0 or null. +Acceptable format specifiers are listed in the table below. +If an argument of type DATE is passed in, it is treated as a TIMESTAMP at midnight (i.e., 00:00:00). +The following table describes the available specifier arguments. + + +| Specifier | Description | +| --- | --- | +| %f | Microseconds (000000..999999) | +| %H | Hour (00..23) | +| %h | Hour (01..12) | +| %I | Hour (01..12) | +| %i | Minutes, numeric (00..59) | +| %p | AM or PM | +| %r | Time, 12-hour (hh:mm:ss followed by AM or PM) | +| %S | Seconds (00..59) | +| %s | Seconds (00..59) | +| %T | Time, 24-hour (hh:mm:ss) | + + +Argument type: STRING/DATE/TIME/TIMESTAMP, STRING +Return type: STRING +Example + +```ppl + +source=people +| eval `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` = TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') +| fields `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++----------------------------------------------------------------------------+ +| TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | +|----------------------------------------------------------------------------| +| 012345 13 01 01 14 PM 01:14:15 PM 15 15 13:14:15 | + ++----------------------------------------------------------------------------+ + +``` + +## TIME_TO_SEC + +### Description + +Usage: time_to_sec(time) returns the time argument, converted to seconds. +Argument type: STRING/TIME/TIMESTAMP +Return type: LONG +Example + +```ppl + +source=people +| eval `TIME_TO_SEC(TIME('22:23:00'))` = TIME_TO_SEC(TIME('22:23:00')) +| fields `TIME_TO_SEC(TIME('22:23:00'))` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++-------------------------------+ +| TIME_TO_SEC(TIME('22:23:00')) | +|-------------------------------| +| 80580 | + ++-------------------------------+ + +``` + +## TIMEDIFF + +### Description + +Usage: returns the difference between two time expressions as a time. +Argument type: TIME, TIME +Return type: TIME +Example + +```ppl + +source=people +| eval `TIMEDIFF('23:59:59', '13:00:00')` = TIMEDIFF('23:59:59', '13:00:00') +| fields `TIMEDIFF('23:59:59', '13:00:00')` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++----------------------------------+ +| TIMEDIFF('23:59:59', '13:00:00') | +|----------------------------------| +| 10:59:59 | + ++----------------------------------+ + +``` + +## TIMESTAMP + +### Description + +Usage: timestamp(expr) constructs a timestamp type with the input string `expr` as an timestamp. If the argument is not a string, it casts `expr` to timestamp type with default timezone UTC. If argument is a time, it applies today's date before cast. +With two arguments `timestamp(expr1, expr2)` adds the time expression `expr2` to the date or timestamp expression `expr1` and returns the result as a timestamp value. +Argument type: STRING/DATE/TIME/TIMESTAMP +Return type map: +(STRING/DATE/TIME/TIMESTAMP) -> TIMESTAMP +(STRING/DATE/TIME/TIMESTAMP, STRING/DATE/TIME/TIMESTAMP) -> TIMESTAMP +Example + +```ppl + +source=people +| eval `TIMESTAMP('2020-08-26 13:49:00')` = TIMESTAMP('2020-08-26 13:49:00'), `TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42'))` = TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) +| fields `TIMESTAMP('2020-08-26 13:49:00')`, `TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42'))` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++----------------------------------+----------------------------------------------------+ +| TIMESTAMP('2020-08-26 13:49:00') | TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | +|----------------------------------+----------------------------------------------------| +| 2020-08-26 13:49:00 | 2020-08-27 02:04:42 | + ++----------------------------------+----------------------------------------------------+ + +``` + +## TIMESTAMPADD + +### Description + +Usage: Returns a TIMESTAMP value based on a passed in DATE/TIME/TIMESTAMP/STRING argument and an INTERVAL and INTEGER argument which determine the amount of time to be added. +If the third argument is a STRING, it must be formatted as a valid TIMESTAMP. If only a TIME is provided, a TIMESTAMP is still returned with the DATE portion filled in using the current date. +If the third argument is a DATE, it will be automatically converted to a TIMESTAMP. +Argument type: INTERVAL, INTEGER, DATE/TIME/TIMESTAMP/STRING +INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] +Examples + +```ppl + +source=people +| eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') +| eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') +| fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++----------------------------------------------+--------------------------------------------------+ +| TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | +|----------------------------------------------+--------------------------------------------------| +| 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | + ++----------------------------------------------+--------------------------------------------------+ + +``` + +## TIMESTAMPDIFF + +### Description + +Usage: TIMESTAMPDIFF(interval, start, end) returns the difference between the start and end date/times in interval units. +If a TIME is provided as an argument, it will be converted to a TIMESTAMP with the DATE portion filled in using the current date. +Arguments will be automatically converted to a TIME/TIMESTAMP when appropriate. +Any argument that is a STRING must be formatted as a valid TIMESTAMP. +Argument type: INTERVAL, DATE/TIME/TIMESTAMP/STRING, DATE/TIME/TIMESTAMP/STRING +INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] +Examples + +```ppl + +source=people +| eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') +| eval `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` = TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) +| fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++-------------------------------------------------------------------+-----------------------------------------------------------+ +| TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | +|-------------------------------------------------------------------+-----------------------------------------------------------| +| 4 | -23 | + ++-------------------------------------------------------------------+-----------------------------------------------------------+ + +``` + +## TO_DAYS + +### Description + +Usage: to_days(date) returns the day number (the number of days since year 0) of the given date. Returns NULL if date is invalid. +Argument type: STRING/DATE/TIMESTAMP +Return type: LONG +Example + +```ppl + +source=people +| eval `TO_DAYS(DATE('2008-10-07'))` = TO_DAYS(DATE('2008-10-07')) +| fields `TO_DAYS(DATE('2008-10-07'))` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++-----------------------------+ +| TO_DAYS(DATE('2008-10-07')) | +|-----------------------------| +| 733687 | + ++-----------------------------+ + +``` + +## TO_SECONDS + +### Description + +Usage: to_seconds(date) returns the number of seconds since the year 0 of the given value. Returns NULL if value is invalid. +An argument of a LONG type can be used. It must be formatted as YMMDD, YYMMDD, YYYMMDD or YYYYMMDD. Note that a LONG type argument cannot have leading 0s as it will be parsed using an octal numbering system. +Argument type: STRING/LONG/DATE/TIME/TIMESTAMP +Return type: LONG +Example + +```ppl + +source=people +| eval `TO_SECONDS(DATE('2008-10-07'))` = TO_SECONDS(DATE('2008-10-07')) +| eval `TO_SECONDS(950228)` = TO_SECONDS(950228) +| fields `TO_SECONDS(DATE('2008-10-07'))`, `TO_SECONDS(950228)` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++--------------------------------+--------------------+ +| TO_SECONDS(DATE('2008-10-07')) | TO_SECONDS(950228) | +|--------------------------------+--------------------| +| 63390556800 | 62961148800 | + ++--------------------------------+--------------------+ + +``` + +## UNIX_TIMESTAMP + +### Description + +Usage: Converts given argument to Unix time (seconds since Epoch - very beginning of year 1970). If no argument given, it returns the current Unix time. +The date argument may be a DATE, or TIMESTAMP string, or a number in YYMMDD, YYMMDDhhmmss, YYYYMMDD, or YYYYMMDDhhmmss format. If the argument includes a time part, it may optionally include a fractional seconds part. +If argument is in invalid format or outside of range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), function returns NULL. +You can use [FROM_UNIXTIME](#from_unixtime) to do reverse conversion. +Argument type: \/DOUBLE/DATE/TIMESTAMP +Return type: DOUBLE +Example + +```ppl + +source=people +| eval `UNIX_TIMESTAMP(double)` = UNIX_TIMESTAMP(20771122143845), `UNIX_TIMESTAMP(timestamp)` = UNIX_TIMESTAMP(TIMESTAMP('1996-11-15 17:05:42')) +| fields `UNIX_TIMESTAMP(double)`, `UNIX_TIMESTAMP(timestamp)` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++------------------------+---------------------------+ +| UNIX_TIMESTAMP(double) | UNIX_TIMESTAMP(timestamp) | +|------------------------+---------------------------| +| 3404817525.0 | 848077542.0 | + ++------------------------+---------------------------+ + +``` + +## UTC_DATE + +### Description + +Returns the current UTC date as a value in 'YYYY-MM-DD'. +Return type: DATE +Specification: UTC_DATE() -> DATE +Example + +```ppl ignore + +source=people +| eval `UTC_DATE()` = UTC_DATE() +| fields `UTC_DATE()` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++------------+ +| UTC_DATE() | +|------------| +| 2025-10-03 | + ++------------+ + +``` + +## UTC_TIME + +### Description + +Returns the current UTC time as a value in 'hh:mm:ss'. +Return type: TIME +Specification: UTC_TIME() -> TIME +Example + +```ppl ignore + +source=people +| eval `UTC_TIME()` = UTC_TIME() +| fields `UTC_TIME()` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++------------+ +| UTC_TIME() | +|------------| +| 17:54:27 | + ++------------+ + +``` + +## UTC_TIMESTAMP + +### Description + +Returns the current UTC timestamp as a value in 'YYYY-MM-DD hh:mm:ss'. +Return type: TIMESTAMP +Specification: UTC_TIMESTAMP() -> TIMESTAMP +Example + +```ppl ignore + +source=people +| eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() +| fields `UTC_TIMESTAMP()` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++---------------------+ +| UTC_TIMESTAMP() | +|---------------------| +| 2025-10-03 17:54:28 | + ++---------------------+ + +``` + +## WEEK + +### Description + +Usage: week(date[, mode]) returns the week number for date. If the mode argument is omitted, the default mode 0 is used. +The following table describes how the mode argument works. + + +| Mode | First day of week | Range | Week 1 is the first week ... | +| --- | --- | --- | --- | +| 0 | Sunday | 0-53 | with a Sunday in this year | +| 1 | Monday | 0-53 | with 4 or more days this year | +| 2 | Sunday | 1-53 | with a Sunday in this year | +| 3 | Monday | 1-53 | with 4 or more days this year | +| 4 | Sunday | 0-53 | with 4 or more days this year | +| 5 | Monday | 0-53 | with a Monday in this year | +| 6 | Sunday | 1-53 | with 4 or more days this year | +| 7 | Monday | 1-53 | with a Monday in this year | + + +Argument type: DATE/TIMESTAMP/STRING +Return type: INTEGER +Synonyms: [WEEK_OF_YEAR](#week_of_year) +Example + +```ppl + +source=people +| eval `WEEK(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK(DATE('2008-02-20'), 1)` = WEEK(DATE('2008-02-20'), 1) +| fields `WEEK(DATE('2008-02-20'))`, `WEEK(DATE('2008-02-20'), 1)` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++--------------------------+-----------------------------+ +| WEEK(DATE('2008-02-20')) | WEEK(DATE('2008-02-20'), 1) | +|--------------------------+-----------------------------| +| 7 | 8 | + ++--------------------------+-----------------------------+ + +``` + +## WEEKDAY + +### Description + +Usage: weekday(date) returns the weekday index for date (0 = Monday, 1 = Tuesday, ..., 6 = Sunday). +It is similar to the [dayofweek](#dayofweek) function, but returns different indexes for each day. +Argument type: STRING/DATE/TIME/TIMESTAMP +Return type: INTEGER +Example + +```ppl + +source=people +| eval `weekday(DATE('2020-08-26'))` = weekday(DATE('2020-08-26')) +| eval `weekday(DATE('2020-08-27'))` = weekday(DATE('2020-08-27')) +| fields `weekday(DATE('2020-08-26'))`, `weekday(DATE('2020-08-27'))` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++-----------------------------+-----------------------------+ +| weekday(DATE('2020-08-26')) | weekday(DATE('2020-08-27')) | +|-----------------------------+-----------------------------| +| 2 | 3 | + ++-----------------------------+-----------------------------+ + +``` + +## WEEK_OF_YEAR + +### Description + +Usage: week_of_year(date[, mode]) returns the week number for date. If the mode argument is omitted, the default mode 0 is used. +The following table describes how the mode argument works. + + +| Mode | First day of week | Range | Week 1 is the first week ... | +| --- | --- | --- | --- | +| 0 | Sunday | 0-53 | with a Sunday in this year | +| 1 | Monday | 0-53 | with 4 or more days this year | +| 2 | Sunday | 1-53 | with a Sunday in this year | +| 3 | Monday | 1-53 | with 4 or more days this year | +| 4 | Sunday | 0-53 | with 4 or more days this year | +| 5 | Monday | 0-53 | with a Monday in this year | +| 6 | Sunday | 1-53 | with 4 or more days this year | +| 7 | Monday | 1-53 | with a Monday in this year | + + +Argument type: DATE/TIMESTAMP/STRING +Return type: INTEGER +Synonyms: [WEEK](#week) +Example + +```ppl + +source=people +| eval `WEEK_OF_YEAR(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` = WEEK_OF_YEAR(DATE('2008-02-20'), 1) +| fields `WEEK_OF_YEAR(DATE('2008-02-20'))`, `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++----------------------------------+-------------------------------------+ +| WEEK_OF_YEAR(DATE('2008-02-20')) | WEEK_OF_YEAR(DATE('2008-02-20'), 1) | +|----------------------------------+-------------------------------------| +| 7 | 8 | + ++----------------------------------+-------------------------------------+ + +``` + +## YEAR + +### Description + +Usage: year(date) returns the year for date, in the range 1000 to 9999, or 0 for the “zero” date. +Argument type: STRING/DATE/TIMESTAMP +Return type: INTEGER +Example + +```ppl + +source=people +| eval `YEAR(DATE('2020-08-26'))` = YEAR(DATE('2020-08-26')) +| fields `YEAR(DATE('2020-08-26'))` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++--------------------------+ +| YEAR(DATE('2020-08-26')) | +|--------------------------| +| 2020 | + ++--------------------------+ + +``` + +## YEARWEEK + +### Description + +Usage: yearweek(date[, mode]) returns the year and week for date as an integer. It accepts and optional mode arguments aligned with those available for the [WEEK](#week) function. +Argument type: STRING/DATE/TIME/TIMESTAMP +Return type: INTEGER +Example + +```ppl + +source=people +| eval `YEARWEEK('2020-08-26')` = YEARWEEK('2020-08-26') +| eval `YEARWEEK('2019-01-05', 1)` = YEARWEEK('2019-01-05', 1) +| fields `YEARWEEK('2020-08-26')`, `YEARWEEK('2019-01-05', 1)` + +``` + +Expected output: + +```text + +fetched rows / total rows = 1/1 ++------------------------+---------------------------+ +| YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 1) | +|------------------------+---------------------------| +| 202034 | 201901 | + ++------------------------+---------------------------+ + +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/datetime.rst b/docs/user/ppl/functions/datetime.rst deleted file mode 100644 index bd69425a2dd..00000000000 --- a/docs/user/ppl/functions/datetime.rst +++ /dev/null @@ -1,2360 +0,0 @@ -======================= -Date and Time Functions -======================= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -.. note:: - - All PPL date and time functions use the UTC time zone. Both input and output values are interpreted as UTC. - For instance, an input timestamp literal like '2020-08-26 01:01:01' is assumed to be in UTC, and the now() - function also returns the current date and time in UTC. - -ADDDATE -------- - -Description ->>>>>>>>>>> - -Usage: adddate(date, INTERVAL expr unit) / adddate(date, days) adds the interval of second argument to date; adddate(date, days) adds the second argument as integer number of days to date. -If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, INTERVAL/LONG - -Return type map: - -(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP - -(DATE, LONG) -> DATE - -(TIMESTAMP/TIME, LONG) -> TIMESTAMP - -Synonyms: `DATE_ADD`_ when invoked with the INTERVAL form of the second argument. - -Antonyms: `SUBDATE`_ - -Example:: - - os> source=people | eval `'2020-08-26' + 1h` = ADDDATE(DATE('2020-08-26'), INTERVAL 1 HOUR), `'2020-08-26' + 1` = ADDDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' + 1` = ADDDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2020-08-26' + 1h`, `'2020-08-26' + 1`, `ts '2020-08-26 01:01:01' + 1` - fetched rows / total rows = 1/1 - +---------------------+------------------+------------------------------+ - | '2020-08-26' + 1h | '2020-08-26' + 1 | ts '2020-08-26 01:01:01' + 1 | - |---------------------+------------------+------------------------------| - | 2020-08-26 01:00:00 | 2020-08-27 | 2020-08-27 01:01:01 | - +---------------------+------------------+------------------------------+ - - - -ADDTIME -------- - -Description ->>>>>>>>>>> - -Usage: addtime(expr1, expr2) adds expr2 to expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME - -Return type map: - -(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP - -(TIME, DATE/TIMESTAMP/TIME) -> TIME - -Antonyms: `SUBTIME`_ - -Example:: - - os> source=people | eval `'2008-12-12' + 0` = ADDTIME(DATE('2008-12-12'), DATE('2008-11-15')) | fields `'2008-12-12' + 0` - fetched rows / total rows = 1/1 - +---------------------+ - | '2008-12-12' + 0 | - |---------------------| - | 2008-12-12 00:00:00 | - +---------------------+ - - os> source=people | eval `'23:59:59' + 0` = ADDTIME(TIME('23:59:59'), DATE('2004-01-01')) | fields `'23:59:59' + 0` - fetched rows / total rows = 1/1 - +----------------+ - | '23:59:59' + 0 | - |----------------| - | 23:59:59 | - +----------------+ - - os> source=people | eval `'2004-01-01' + '23:59:59'` = ADDTIME(DATE('2004-01-01'), TIME('23:59:59')) | fields `'2004-01-01' + '23:59:59'` - fetched rows / total rows = 1/1 - +---------------------------+ - | '2004-01-01' + '23:59:59' | - |---------------------------| - | 2004-01-01 23:59:59 | - +---------------------------+ - - os> source=people | eval `'10:20:30' + '00:05:42'` = ADDTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' + '00:05:42'` - fetched rows / total rows = 1/1 - +-------------------------+ - | '10:20:30' + '00:05:42' | - |-------------------------| - | 10:26:12 | - +-------------------------+ - - os> source=people | eval `'2007-02-28 10:20:30' + '20:40:50'` = ADDTIME(TIMESTAMP('2007-02-28 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) | fields `'2007-02-28 10:20:30' + '20:40:50'` - fetched rows / total rows = 1/1 - +------------------------------------+ - | '2007-02-28 10:20:30' + '20:40:50' | - |------------------------------------| - | 2007-03-01 07:01:20 | - +------------------------------------+ - - -CONVERT_TZ ----------- - -Description ->>>>>>>>>>> - -Usage: convert_tz(timestamp, from_timezone, to_timezone) constructs a local timestamp converted from the from_timezone to the to_timezone. CONVERT_TZ returns null when any of the three function arguments are invalid, i.e. timestamp is not in the format yyyy-MM-dd HH:mm:ss or the timeszone is not in (+/-)HH:mm. It also is invalid for invalid dates, such as February 30th and invalid timezones, which are ones outside of -13:59 and +14:00. - -Argument type: TIMESTAMP/STRING, STRING, STRING - -Return type: TIMESTAMP - -Conversion from +00:00 timezone to +10:00 timezone. Returns the timestamp argument converted from +00:00 to +10:00 -Example:: - - os> source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | fields `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | - |-----------------------------------------------------| - | 2008-05-15 22:00:00 | - +-----------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +15:00 in this example will return null. -Example:: - - os> source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+15:00')| fields `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+00:00','+15:00') | - |-----------------------------------------------------| - | null | - +-----------------------------------------------------+ - -Conversion from a positive timezone to a negative timezone that goes over date line. -Example:: - - os> source=people | eval `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` = convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | - |-----------------------------------------------------| - | 2008-05-14 22:30:00 | - +-----------------------------------------------------+ - -Valid dates are required in convert_tz, invalid dates such as April 31st (not a date in the Gregorian calendar) will result in null. -Example:: - - os> source=people | eval `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` = convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | - |-----------------------------------------------------| - | null | - +-----------------------------------------------------+ - -Valid dates are required in convert_tz, invalid dates such as February 30th (not a date in the Gregorian calendar) will result in null. -Example:: - - os> source=people | eval `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | - |-----------------------------------------------------| - | null | - +-----------------------------------------------------+ - -February 29th 2008 is a valid date because it is a leap year. -Example:: - - os> source=people | eval `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | - |-----------------------------------------------------| - | 2008-02-28 22:30:00 | - +-----------------------------------------------------+ - -Valid dates are required in convert_tz, invalid dates such as February 29th 2007 (2007 is not a leap year) will result in null. -Example:: - - os> source=people | eval `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | fields `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | - |-----------------------------------------------------| - | null | - +-----------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:01 in this example will return null. -Example:: - - os> source=people | eval `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | fields `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | - |-----------------------------------------------------| - | null | - +-----------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:00 in this example will return a correctly converted date time object. -Example:: - - os> source=people | eval `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | fields `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | - |-----------------------------------------------------| - | 2008-01-31 22:00:00 | - +-----------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as -14:00 will result in null -Example:: - - os> source=people | eval `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | fields `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | - |-----------------------------------------------------| - | null | - +-----------------------------------------------------+ - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. This timezone is within range so it is valid and will convert the time. -Example:: - - os> source=people | eval `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` = convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | fields `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` - fetched rows / total rows = 1/1 - +-----------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | - |-----------------------------------------------------| - | 2008-02-02 01:59:00 | - +-----------------------------------------------------+ - - -CURDATE -------- - -Description ->>>>>>>>>>> - -Returns the current date as a value in 'YYYY-MM-DD' format. -CURDATE() returns the current date in UTC at the time the statement is executed. - - -Return type: DATE - -Specification: CURDATE() -> DATE - -Example:: - - > source=people | eval `CURDATE()` = CURDATE() | fields `CURDATE()` - fetched rows / total rows = 1/1 - +------------+ - | CURDATE() | - |------------| - | 2022-08-02 | - +------------+ - - -CURRENT_DATE ------------- - -Description ->>>>>>>>>>> - -`CURRENT_DATE()` is a synonym for `CURDATE() <#curdate>`_. - -Example:: - - > source=people | eval `CURRENT_DATE()` = CURRENT_DATE() | fields `CURRENT_DATE()` - fetched rows / total rows = 1/1 - +------------------+ - | CURRENT_DATE() | - |------------------+ - | 2022-08-02 | - +------------------+ - - -CURRENT_TIME ------------- - -Description ->>>>>>>>>>> - -`CURRENT_TIME()` is a synonym for `CURTIME() <#curtime>`_. - -Example:: - - > source=people | eval `CURRENT_TIME()` = CURRENT_TIME() | fields `CURRENT_TIME()` - fetched rows / total rows = 1/1 - +------------------+ - | CURRENT_TIME() | - |------------------+ - | 15:39:05 | - +------------------+ - - -CURRENT_TIMESTAMP ------------------ - -Description ->>>>>>>>>>> - -`CURRENT_TIMESTAMP()` is a synonym for `NOW() <#now>`_. - -Example:: - - > source=people | eval `CURRENT_TIMESTAMP()` = CURRENT_TIMESTAMP() | fields `CURRENT_TIMESTAMP()` - fetched rows / total rows = 1/1 - +-----------------------+ - | CURRENT_TIMESTAMP() | - |-----------------------+ - | 2022-08-02 15:54:19 | - +-----------------------+ - - -CURTIME -------- - -Description ->>>>>>>>>>> - -Returns the current time as a value in 'hh:mm:ss' format in the UTC time zone. -CURTIME() returns the time at which the statement began to execute as `NOW() <#now>`_ does. - -Return type: TIME - -Specification: CURTIME() -> TIME - -Example:: - - > source=people | eval `value_1` = CURTIME(), `value_2` = CURTIME() | fields `value_1`, `value_2` - fetched rows / total rows = 1/1 - +----------+----------+ - | value_1 | value_2 | - |----------+----------| - | 15:39:05 | 15:39:05 | - +----------+----------+ - - -DATE ----- - -Description ->>>>>>>>>>> - -Usage: date(expr) constructs a date type with the input string expr as a date. If the argument is of date/timestamp, it extracts the date value part from the expression. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: DATE - -Example:: - - os> source=people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')` - fetched rows / total rows = 1/1 - +--------------------+ - | DATE('2020-08-26') | - |--------------------| - | 2020-08-26 | - +--------------------+ - - os> source=people | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))` - fetched rows / total rows = 1/1 - +----------------------------------------+ - | DATE(TIMESTAMP('2020-08-26 13:49:00')) | - |----------------------------------------| - | 2020-08-26 | - +----------------------------------------+ - - os> source=people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')` - fetched rows / total rows = 1/1 - +--------------------------+ - | DATE('2020-08-26 13:49') | - |--------------------------| - | 2020-08-26 | - +--------------------------+ - - os> source=people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')` - fetched rows / total rows = 1/1 - +--------------------------+ - | DATE('2020-08-26 13:49') | - |--------------------------| - | 2020-08-26 | - +--------------------------+ - - -DATE_ADD --------- - -Description ->>>>>>>>>>> - -Usage: date_add(date, INTERVAL expr unit) adds the interval expr to date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, INTERVAL - -Return type: TIMESTAMP - -Synonyms: `ADDDATE`_ - -Antonyms: `DATE_SUB`_ - -Example:: - - os> source=people | eval `'2020-08-26' + 1h` = DATE_ADD(DATE('2020-08-26'), INTERVAL 1 HOUR), `ts '2020-08-26 01:01:01' + 1d` = DATE_ADD(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 DAY) | fields `'2020-08-26' + 1h`, `ts '2020-08-26 01:01:01' + 1d` - fetched rows / total rows = 1/1 - +---------------------+-------------------------------+ - | '2020-08-26' + 1h | ts '2020-08-26 01:01:01' + 1d | - |---------------------+-------------------------------| - | 2020-08-26 01:00:00 | 2020-08-27 01:01:01 | - +---------------------+-------------------------------+ - - -DATE_FORMAT ------------ - -Description ->>>>>>>>>>> - -Usage: date_format(date, format) formats the date argument using the specifiers in the format argument. -If an argument of type TIME is provided, the local date is used. - -.. list-table:: The following table describes the available specifier arguments. - :widths: 20 80 - :header-rows: 1 - - * - Specifier - - Description - * - %a - - Abbreviated weekday name (Sun..Sat) - * - %b - - Abbreviated month name (Jan..Dec) - * - %c - - Month, numeric (0..12) - * - %D - - Day of the month with English suffix (0th, 1st, 2nd, 3rd, ...) - * - %d - - Day of the month, numeric (00..31) - * - %e - - Day of the month, numeric (0..31) - * - %f - - Microseconds (000000..999999) - * - %H - - Hour (00..23) - * - %h - - Hour (01..12) - * - %I - - Hour (01..12) - * - %i - - Minutes, numeric (00..59) - * - %j - - Day of year (001..366) - * - %k - - Hour (0..23) - * - %l - - Hour (1..12) - * - %M - - Month name (January..December) - * - %m - - Month, numeric (00..12) - * - %p - - AM or PM - * - %r - - Time, 12-hour (hh:mm:ss followed by AM or PM) - * - %S - - Seconds (00..59) - * - %s - - Seconds (00..59) - * - %T - - Time, 24-hour (hh:mm:ss) - * - %U - - Week (00..53), where Sunday is the first day of the week; WEEK() mode 0 - * - %u - - Week (00..53), where Monday is the first day of the week; WEEK() mode 1 - * - %V - - Week (01..53), where Sunday is the first day of the week; WEEK() mode 2; used with %X - * - %v - - Week (01..53), where Monday is the first day of the week; WEEK() mode 3; used with %x - * - %W - - Weekday name (Sunday..Saturday) - * - %w - - Day of the week (0=Sunday..6=Saturday) - * - %X - - Year for the week where Sunday is the first day of the week, numeric, four digits; used with %V - * - %x - - Year for the week, where Monday is the first day of the week, numeric, four digits; used with %v - * - %Y - - Year, numeric, four digits - * - %y - - Year, numeric (two digits) - * - %% - - A literal % character - * - %x - - x, for any “x” not listed above - * - x - - x, for any smallcase/uppercase alphabet except [aydmshiHIMYDSEL] - -Argument type: STRING/DATE/TIME/TIMESTAMP, STRING - -Return type: STRING - -Example:: - - os> source=people | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')` = DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` - fetched rows / total rows = 1/1 - +----------------------------------------------------+---------------------------------------------------------------------+ - | DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f') | DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | - |----------------------------------------------------+---------------------------------------------------------------------| - | 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | - +----------------------------------------------------+---------------------------------------------------------------------+ - - -DATETIME --------- - -Description ->>>>>>>>>>> - -Usage: DATETIME(timestamp)/ DATETIME(date, to_timezone) Converts the datetime to a new timezone - -Argument type: timestamp/STRING - -Return type map: - -(TIMESTAMP, STRING) -> TIMESTAMP - -(TIMESTAMP) -> TIMESTAMP - - -Converting timestamp with timezone to the second argument timezone. -Example:: - - os> source=people | eval `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` = DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | fields `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------------+ - | DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | - |-------------------------------------------------| - | 2004-02-29 19:00:00 | - +-------------------------------------------------+ - - -The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. -Example:: - - os> source=people | eval `DATETIME('2008-01-01 02:00:00', '-14:00')` = DATETIME('2008-01-01 02:00:00', '-14:00') | fields `DATETIME('2008-01-01 02:00:00', '-14:00')` - fetched rows / total rows = 1/1 - +-------------------------------------------+ - | DATETIME('2008-01-01 02:00:00', '-14:00') | - |-------------------------------------------| - | null | - +-------------------------------------------+ - - -DATE_SUB --------- - -Description ->>>>>>>>>>> - -Usage: date_sub(date, INTERVAL expr unit) subtracts the interval expr from date. If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, INTERVAL - -Return type: TIMESTAMP - -Synonyms: `SUBDATE`_ - -Antonyms: `DATE_ADD`_ - -Example:: - - os> source=people | eval `'2008-01-02' - 31d` = DATE_SUB(DATE('2008-01-02'), INTERVAL 31 DAY), `ts '2020-08-26 01:01:01' + 1h` = DATE_SUB(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 HOUR) | fields `'2008-01-02' - 31d`, `ts '2020-08-26 01:01:01' + 1h` - fetched rows / total rows = 1/1 - +---------------------+-------------------------------+ - | '2008-01-02' - 31d | ts '2020-08-26 01:01:01' + 1h | - |---------------------+-------------------------------| - | 2007-12-02 00:00:00 | 2020-08-26 00:01:01 | - +---------------------+-------------------------------+ - - -DATEDIFF --------- - -Usage: Calculates the difference of date parts of given values. If the first argument is time, today's date is used. - -Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME - -Return type: LONG - -Example:: - - os> source=people | eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')), `today - today` = DATEDIFF(TIME('23:59:59'), TIME('00:00:00')) | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`, `today - today` - fetched rows / total rows = 1/1 - +-----------------------------+-----------------------------+---------------+ - | '2000-01-02' - '2000-01-01' | '2001-02-01' - '2004-01-01' | today - today | - |-----------------------------+-----------------------------+---------------| - | 1 | -1064 | 0 | - +-----------------------------+-----------------------------+---------------+ - - -DAY ---- - -Description ->>>>>>>>>>> - -Usage: day(date) extracts the day of the month for date, in the range 1 to 31. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `DAYOFMONTH`_, `DAY_OF_MONTH`_ - -Example:: - - os> source=people | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) | fields `DAY(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +-------------------------+ - | DAY(DATE('2020-08-26')) | - |-------------------------| - | 26 | - +-------------------------+ - - -DAYNAME -------- - -Description ->>>>>>>>>>> - -Usage: dayname(date) returns the name of the weekday for date, including Monday, Tuesday, Wednesday, Thursday, Friday, Saturday and Sunday. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: STRING - -Example:: - - os> source=people | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +-----------------------------+ - | DAYNAME(DATE('2020-08-26')) | - |-----------------------------| - | Wednesday | - +-----------------------------+ - - -DAYOFMONTH ----------- - -Description ->>>>>>>>>>> - -Usage: dayofmonth(date) extracts the day of the month for date, in the range 1 to 31. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `DAY`_, `DAY_OF_MONTH`_ - -Example:: - - os> source=people | eval `DAYOFMONTH(DATE('2020-08-26'))` = DAYOFMONTH(DATE('2020-08-26')) | fields `DAYOFMONTH(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +--------------------------------+ - | DAYOFMONTH(DATE('2020-08-26')) | - |--------------------------------| - | 26 | - +--------------------------------+ - - -DAY_OF_MONTH ------------- - -Description ->>>>>>>>>>> - -Usage: day_of_month(date) extracts the day of the month for date, in the range 1 to 31. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `DAY`_, `DAYOFMONTH`_ - -Example:: - - os> source=people | eval `DAY_OF_MONTH(DATE('2020-08-26'))` = DAY_OF_MONTH(DATE('2020-08-26')) | fields `DAY_OF_MONTH(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +----------------------------------+ - | DAY_OF_MONTH(DATE('2020-08-26')) | - |----------------------------------| - | 26 | - +----------------------------------+ - - -DAYOFWEEK ---------- - -Description ->>>>>>>>>>> - -Usage: dayofweek(date) returns the weekday index for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday). - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `DAY_OF_WEEK`_ - -Example:: - - os> source=people | eval `DAYOFWEEK(DATE('2020-08-26'))` = DAYOFWEEK(DATE('2020-08-26')) | fields `DAYOFWEEK(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +-------------------------------+ - | DAYOFWEEK(DATE('2020-08-26')) | - |-------------------------------| - | 4 | - +-------------------------------+ - - -DAY_OF_WEEK ------------ - -Description ->>>>>>>>>>> - -Usage: day_of_week(date) returns the weekday index for date (1 = Sunday, 2 = Monday, ..., 7 = Saturday). - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `DAYOFWEEK`_ - -Example:: - - os> source=people | eval `DAY_OF_WEEK(DATE('2020-08-26'))` = DAY_OF_WEEK(DATE('2020-08-26')) | fields `DAY_OF_WEEK(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +---------------------------------+ - | DAY_OF_WEEK(DATE('2020-08-26')) | - |---------------------------------| - | 4 | - +---------------------------------+ - - -DAYOFYEAR ---------- - -Description ->>>>>>>>>>> - -Usage: dayofyear(date) returns the day of the year for date, in the range 1 to 366. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `DAY_OF_YEAR`_ - -Example:: - - os> source=people | eval `DAYOFYEAR(DATE('2020-08-26'))` = DAYOFYEAR(DATE('2020-08-26')) | fields `DAYOFYEAR(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +-------------------------------+ - | DAYOFYEAR(DATE('2020-08-26')) | - |-------------------------------| - | 239 | - +-------------------------------+ - - -DAY_OF_YEAR ------------ - -Description ->>>>>>>>>>> - -Usage: day_of_year(date) returns the day of the year for date, in the range 1 to 366. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `DAYOFYEAR`_ - -Example:: - - os> source=people | eval `DAY_OF_YEAR(DATE('2020-08-26'))` = DAY_OF_YEAR(DATE('2020-08-26')) | fields `DAY_OF_YEAR(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +---------------------------------+ - | DAY_OF_YEAR(DATE('2020-08-26')) | - |---------------------------------| - | 239 | - +---------------------------------+ - - -EXTRACT -------- - -Description ->>>>>>>>>>> - -Usage: extract(part FROM date) returns a LONG with digits in order according to the given 'part' arguments. -The specific format of the returned long is determined by the table below. - -Argument type: PART, where PART is one of the following tokens in the table below. - -The format specifiers found in this table are the same as those found in the `DATE_FORMAT`_ function. - -.. list-table:: The following table describes the mapping of a 'part' to a particular format. - :widths: 20 80 - :header-rows: 1 - - * - Part - - Format - * - MICROSECOND - - %f - * - SECOND - - %s - * - MINUTE - - %i - * - HOUR - - %H - * - DAY - - %d - * - WEEK - - %X - * - MONTH - - %m - * - YEAR - - %V - * - SECOND_MICROSECOND - - %s%f - * - MINUTE_MICROSECOND - - %i%s%f - * - MINUTE_SECOND - - %i%s - * - HOUR_MICROSECOND - - %H%i%s%f - * - HOUR_SECOND - - %H%i%s - * - HOUR_MINUTE - - %H%i - * - DAY_MICROSECOND - - %d%H%i%s%f - * - DAY_SECOND - - %d%H%i%s - * - DAY_MINUTE - - %d%H%i - * - DAY_HOUR - - %d%H% - * - YEAR_MONTH - - %V%m - -Return type: LONG - -Example:: - - os> source=people | eval `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` = extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | fields `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` - fetched rows / total rows = 1/1 - +------------------------------------------------+ - | extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | - |------------------------------------------------| - | 202302 | - +------------------------------------------------+ - - -FROM_DAYS ---------- - -Description ->>>>>>>>>>> - -Usage: from_days(N) returns the date value given the day number N. - -Argument type: INTEGER/LONG - -Return type: DATE - -Example:: - - os> source=people | eval `FROM_DAYS(733687)` = FROM_DAYS(733687) | fields `FROM_DAYS(733687)` - fetched rows / total rows = 1/1 - +-------------------+ - | FROM_DAYS(733687) | - |-------------------| - | 2008-10-07 | - +-------------------+ - - -FROM_UNIXTIME -------------- - -Description ->>>>>>>>>>> - -Usage: Returns a representation of the argument given as a timestamp or character string value. Perform reverse conversion for `UNIX_TIMESTAMP`_ function. -If second argument is provided, it is used to format the result in the same way as the format string used for the `DATE_FORMAT`_ function. -If timestamp is outside of range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), function returns NULL. -Argument type: DOUBLE, STRING - -Return type map: - -DOUBLE -> TIMESTAMP - -DOUBLE, STRING -> STRING - -Examples:: - - os> source=people | eval `FROM_UNIXTIME(1220249547)` = FROM_UNIXTIME(1220249547) | fields `FROM_UNIXTIME(1220249547)` - fetched rows / total rows = 1/1 - +---------------------------+ - | FROM_UNIXTIME(1220249547) | - |---------------------------| - | 2008-09-01 06:12:27 | - +---------------------------+ - - os> source=people | eval `FROM_UNIXTIME(1220249547, '%T')` = FROM_UNIXTIME(1220249547, '%T') | fields `FROM_UNIXTIME(1220249547, '%T')` - fetched rows / total rows = 1/1 - +---------------------------------+ - | FROM_UNIXTIME(1220249547, '%T') | - |---------------------------------| - | 06:12:27 | - +---------------------------------+ - - -GET_FORMAT ----------- - -Description ->>>>>>>>>>> - -Usage: Returns a string value containing string format specifiers based on the input arguments. - -Argument type: TYPE, STRING, where TYPE must be one of the following tokens: [DATE, TIME, TIMESTAMP], and -STRING must be one of the following tokens: ["USA", "JIS", "ISO", "EUR", "INTERNAL"] (" can be replaced by '). - -Examples:: - - os> source=people | eval `GET_FORMAT(DATE, 'USA')` = GET_FORMAT(DATE, 'USA') | fields `GET_FORMAT(DATE, 'USA')` - fetched rows / total rows = 1/1 - +-------------------------+ - | GET_FORMAT(DATE, 'USA') | - |-------------------------| - | %m.%d.%Y | - +-------------------------+ - - -HOUR ----- - -Description ->>>>>>>>>>> - -Usage: hour(time) extracts the hour value for time. Different from the time of day value, the time value has a large range and can be greater than 23, so the return value of hour(time) can be also greater than 23. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Synonyms: `HOUR_OF_DAY`_ - -Example:: - - os> source=people | eval `HOUR(TIME('01:02:03'))` = HOUR(TIME('01:02:03')) | fields `HOUR(TIME('01:02:03'))` - fetched rows / total rows = 1/1 - +------------------------+ - | HOUR(TIME('01:02:03')) | - |------------------------| - | 1 | - +------------------------+ - - -HOUR_OF_DAY ------------ - -Description ->>>>>>>>>>> - -Usage: hour_of_day(time) extracts the hour value for time. Different from the time of day value, the time value has a large range and can be greater than 23, so the return value of hour_of_day(time) can be also greater than 23. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Synonyms: `HOUR`_ - -Example:: - - os> source=people | eval `HOUR_OF_DAY(TIME('01:02:03'))` = HOUR_OF_DAY(TIME('01:02:03')) | fields `HOUR_OF_DAY(TIME('01:02:03'))` - fetched rows / total rows = 1/1 - +-------------------------------+ - | HOUR_OF_DAY(TIME('01:02:03')) | - |-------------------------------| - | 1 | - +-------------------------------+ - - -LAST_DAY --------- - -Usage: Returns the last day of the month as a DATE for a valid argument. - -Argument type: DATE/STRING/TIMESTAMP/TIME - -Return type: DATE - -Example:: - - os> source=people | eval `last_day('2023-02-06')` = last_day('2023-02-06') | fields `last_day('2023-02-06')` - fetched rows / total rows = 1/1 - +------------------------+ - | last_day('2023-02-06') | - |------------------------| - | 2023-02-28 | - +------------------------+ - - -LOCALTIMESTAMP --------------- - -Description ->>>>>>>>>>> - -`LOCALTIMESTAMP()` are synonyms for `NOW() <#now>`_. - -Example:: - - > source=people | eval `LOCALTIMESTAMP()` = LOCALTIMESTAMP() | fields `LOCALTIMESTAMP()` - fetched rows / total rows = 1/1 - +---------------------+ - | LOCALTIMESTAMP() | - |---------------------+ - | 2022-08-02 15:54:19 | - +---------------------+ - - -LOCALTIME ---------- - -Description ->>>>>>>>>>> - -`LOCALTIME()` are synonyms for `NOW() <#now>`_. - -Example:: - - > source=people | eval `LOCALTIME()` = LOCALTIME() | fields `LOCALTIME()` - fetched rows / total rows = 1/1 - +---------------------+ - | LOCALTIME() | - |---------------------+ - | 2022-08-02 15:54:19 | - +---------------------+ - - -MAKEDATE --------- - -Description ->>>>>>>>>>> - -Returns a date, given `year` and `day-of-year` values. `dayofyear` must be greater than 0 or the result is `NULL`. The result is also `NULL` if either argument is `NULL`. -Arguments are rounded to an integer. - -Limitations: -- Zero `year` interpreted as 2000; -- Negative `year` is not accepted; -- `day-of-year` should be greater than zero; -- `day-of-year` could be greater than 365/366, calculation switches to the next year(s) (see example). - -Specifications: - -1. MAKEDATE(DOUBLE, DOUBLE) -> DATE - -Argument type: DOUBLE - -Return type: DATE - -Example:: - - os> source=people | eval `MAKEDATE(1945, 5.9)` = MAKEDATE(1945, 5.9), `MAKEDATE(1984, 1984)` = MAKEDATE(1984, 1984) | fields `MAKEDATE(1945, 5.9)`, `MAKEDATE(1984, 1984)` - fetched rows / total rows = 1/1 - +---------------------+----------------------+ - | MAKEDATE(1945, 5.9) | MAKEDATE(1984, 1984) | - |---------------------+----------------------| - | 1945-01-06 | 1989-06-06 | - +---------------------+----------------------+ - - -MAKETIME --------- - -Description ->>>>>>>>>>> - -Returns a time value calculated from the hour, minute, and second arguments. Returns `NULL` if any of its arguments are `NULL`. -The second argument can have a fractional part, rest arguments are rounded to an integer. - -Limitations: -- 24-hour clock is used, available time range is [00:00:00.0 - 23:59:59.(9)]; -- Up to 9 digits of second fraction part is taken (nanosecond precision). - -Specifications: - -1. MAKETIME(DOUBLE, DOUBLE, DOUBLE) -> TIME - -Argument type: DOUBLE - -Return type: TIME - -Example:: - - os> source=people | eval `MAKETIME(20, 30, 40)` = MAKETIME(20, 30, 40), `MAKETIME(20.2, 49.5, 42.100502)` = MAKETIME(20.2, 49.5, 42.100502) | fields `MAKETIME(20, 30, 40)`, `MAKETIME(20.2, 49.5, 42.100502)` - fetched rows / total rows = 1/1 - +----------------------+---------------------------------+ - | MAKETIME(20, 30, 40) | MAKETIME(20.2, 49.5, 42.100502) | - |----------------------+---------------------------------| - | 20:30:40 | 20:50:42.100502 | - +----------------------+---------------------------------+ - - -MICROSECOND ------------ - -Description ->>>>>>>>>>> - -Usage: microsecond(expr) returns the microseconds from the time or timestamp expression expr as a number in the range from 0 to 999999. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Example:: - - os> source=people | eval `MICROSECOND(TIME('01:02:03.123456'))` = MICROSECOND(TIME('01:02:03.123456')) | fields `MICROSECOND(TIME('01:02:03.123456'))` - fetched rows / total rows = 1/1 - +--------------------------------------+ - | MICROSECOND(TIME('01:02:03.123456')) | - |--------------------------------------| - | 123456 | - +--------------------------------------+ - - -MINUTE ------- - -Description ->>>>>>>>>>> - -Usage: minute(time) returns the minute for time, in the range 0 to 59. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Synonyms: `MINUTE_OF_HOUR`_ - -Example:: - - os> source=people | eval `MINUTE(TIME('01:02:03'))` = MINUTE(TIME('01:02:03')) | fields `MINUTE(TIME('01:02:03'))` - fetched rows / total rows = 1/1 - +--------------------------+ - | MINUTE(TIME('01:02:03')) | - |--------------------------| - | 2 | - +--------------------------+ - - -MINUTE_OF_DAY -------------- - -Description ->>>>>>>>>>> - -Usage: minute(time) returns the amount of minutes in the day, in the range of 0 to 1439. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Example:: - - os> source=people | eval `MINUTE_OF_DAY(TIME('01:02:03'))` = MINUTE_OF_DAY(TIME('01:02:03')) | fields `MINUTE_OF_DAY(TIME('01:02:03'))` - fetched rows / total rows = 1/1 - +---------------------------------+ - | MINUTE_OF_DAY(TIME('01:02:03')) | - |---------------------------------| - | 62 | - +---------------------------------+ - - -MINUTE_OF_HOUR --------------- - -Description ->>>>>>>>>>> - -Usage: minute(time) returns the minute for time, in the range 0 to 59. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Synonyms: `MINUTE`_ - -Example:: - - os> source=people | eval `MINUTE_OF_HOUR(TIME('01:02:03'))` = MINUTE_OF_HOUR(TIME('01:02:03')) | fields `MINUTE_OF_HOUR(TIME('01:02:03'))` - fetched rows / total rows = 1/1 - +----------------------------------+ - | MINUTE_OF_HOUR(TIME('01:02:03')) | - |----------------------------------| - | 2 | - +----------------------------------+ - - -MONTH ------ - -Description ->>>>>>>>>>> - -Usage: month(date) returns the month for date, in the range 1 to 12 for January to December. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `MONTH_OF_YEAR`_ - -Example:: - - os> source=people | eval `MONTH(DATE('2020-08-26'))` = MONTH(DATE('2020-08-26')) | fields `MONTH(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +---------------------------+ - | MONTH(DATE('2020-08-26')) | - |---------------------------| - | 8 | - +---------------------------+ - - -MONTH_OF_YEAR -------------- - -Description ->>>>>>>>>>> - -Usage: month_of_year(date) returns the month for date, in the range 1 to 12 for January to December. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Synonyms: `MONTH`_ - -Example:: - - os> source=people | eval `MONTH_OF_YEAR(DATE('2020-08-26'))` = MONTH_OF_YEAR(DATE('2020-08-26')) | fields `MONTH_OF_YEAR(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +-----------------------------------+ - | MONTH_OF_YEAR(DATE('2020-08-26')) | - |-----------------------------------| - | 8 | - +-----------------------------------+ - - -MONTHNAME ---------- - -Description ->>>>>>>>>>> - -Usage: monthname(date) returns the full name of the month for date. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: STRING - -Example:: - - os> source=people | eval `MONTHNAME(DATE('2020-08-26'))` = MONTHNAME(DATE('2020-08-26')) | fields `MONTHNAME(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +-------------------------------+ - | MONTHNAME(DATE('2020-08-26')) | - |-------------------------------| - | August | - +-------------------------------+ - - -NOW ---- - -Description ->>>>>>>>>>> - -Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss' format. The value is expressed in the UTC time zone. -`NOW()` returns a constant time that indicates the time at which the statement began to execute. This differs from the behavior for `SYSDATE() <#sysdate>`_, which returns the exact time at which it executes. - -Return type: TIMESTAMP - -Specification: NOW() -> TIMESTAMP - -Example:: - - > source=people | eval `value_1` = NOW(), `value_2` = NOW() | fields `value_1`, `value_2` - fetched rows / total rows = 1/1 - +---------------------+---------------------+ - | value_1 | value_2 | - |---------------------+---------------------| - | 2022-08-02 15:39:05 | 2022-08-02 15:39:05 | - +---------------------+---------------------+ - - -PERIOD_ADD ----------- - -Description ->>>>>>>>>>> - -Usage: period_add(P, N) add N months to period P (in the format YYMM or YYYYMM). Returns a value in the format YYYYMM. - -Argument type: INTEGER, INTEGER - -Return type: INTEGER - -Example:: - - os> source=people | eval `PERIOD_ADD(200801, 2)` = PERIOD_ADD(200801, 2), `PERIOD_ADD(200801, -12)` = PERIOD_ADD(200801, -12) | fields `PERIOD_ADD(200801, 2)`, `PERIOD_ADD(200801, -12)` - fetched rows / total rows = 1/1 - +-----------------------+-------------------------+ - | PERIOD_ADD(200801, 2) | PERIOD_ADD(200801, -12) | - |-----------------------+-------------------------| - | 200803 | 200701 | - +-----------------------+-------------------------+ - - -PERIOD_DIFF ------------ - -Description ->>>>>>>>>>> - -Usage: period_diff(P1, P2) returns the number of months between periods P1 and P2 given in the format YYMM or YYYYMM. - -Argument type: INTEGER, INTEGER - -Return type: INTEGER - -Example:: - - os> source=people | eval `PERIOD_DIFF(200802, 200703)` = PERIOD_DIFF(200802, 200703), `PERIOD_DIFF(200802, 201003)` = PERIOD_DIFF(200802, 201003) | fields `PERIOD_DIFF(200802, 200703)`, `PERIOD_DIFF(200802, 201003)` - fetched rows / total rows = 1/1 - +-----------------------------+-----------------------------+ - | PERIOD_DIFF(200802, 200703) | PERIOD_DIFF(200802, 201003) | - |-----------------------------+-----------------------------| - | 11 | -25 | - +-----------------------------+-----------------------------+ - - -QUARTER -------- - -Description ->>>>>>>>>>> - -Usage: quarter(date) returns the quarter of the year for date, in the range 1 to 4. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Example:: - - os> source=people | eval `QUARTER(DATE('2020-08-26'))` = QUARTER(DATE('2020-08-26')) | fields `QUARTER(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +-----------------------------+ - | QUARTER(DATE('2020-08-26')) | - |-----------------------------| - | 3 | - +-----------------------------+ - - -SEC_TO_TIME ------------ - -Description ->>>>>>>>>>> - -Usage: sec_to_time(number) returns the time in HH:mm:ssss[.nnnnnn] format. -Note that the function returns a time between 00:00:00 and 23:59:59. -If an input value is too large (greater than 86399), the function will wrap around and begin returning outputs starting from 00:00:00. -If an input value is too small (less than 0), the function will wrap around and begin returning outputs counting down from 23:59:59. - -Argument type: INTEGER, LONG, DOUBLE, FLOAT - -Return type: TIME - -Example:: - - os> source=people | eval `SEC_TO_TIME(3601)` = SEC_TO_TIME(3601) | eval `SEC_TO_TIME(1234.123)` = SEC_TO_TIME(1234.123) | fields `SEC_TO_TIME(3601)`, `SEC_TO_TIME(1234.123)` - fetched rows / total rows = 1/1 - +-------------------+-----------------------+ - | SEC_TO_TIME(3601) | SEC_TO_TIME(1234.123) | - |-------------------+-----------------------| - | 01:00:01 | 00:20:34.123 | - +-------------------+-----------------------+ - - -SECOND ------- - -Description ->>>>>>>>>>> - -Usage: second(time) returns the second for time, in the range 0 to 59. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Synonyms: `SECOND_OF_MINUTE`_ - -Example:: - - os> source=people | eval `SECOND(TIME('01:02:03'))` = SECOND(TIME('01:02:03')) | fields `SECOND(TIME('01:02:03'))` - fetched rows / total rows = 1/1 - +--------------------------+ - | SECOND(TIME('01:02:03')) | - |--------------------------| - | 3 | - +--------------------------+ - - -SECOND_OF_MINUTE ----------------- - -Description ->>>>>>>>>>> - -Usage: second_of_minute(time) returns the second for time, in the range 0 to 59. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: INTEGER - -Synonyms: `SECOND`_ - -Example:: - - os> source=people | eval `SECOND_OF_MINUTE(TIME('01:02:03'))` = SECOND_OF_MINUTE(TIME('01:02:03')) | fields `SECOND_OF_MINUTE(TIME('01:02:03'))` - fetched rows / total rows = 1/1 - +------------------------------------+ - | SECOND_OF_MINUTE(TIME('01:02:03')) | - |------------------------------------| - | 3 | - +------------------------------------+ - - -STRFTIME --------- - -**Version: 3.3.0** - -Description ->>>>>>>>>>> - -Usage: strftime(time, format) takes a UNIX timestamp (in seconds) and renders it as a string using the format specified. For numeric inputs, the UNIX time must be in seconds. Values greater than 100000000000 are automatically treated as milliseconds and converted to seconds. -You can use time format variables with the strftime function. This function performs the reverse operation of `UNIX_TIMESTAMP`_ and is similar to `FROM_UNIXTIME`_ but with POSIX-style format specifiers. - -.. note:: - - **Available only when Calcite engine is enabled** - - All timestamps are interpreted as UTC timezone - - Text formatting uses language-neutral Locale.ROOT (weekday and month names appear in abbreviated form) - - String inputs are NOT supported - use `unix_timestamp()` to convert strings first - - Functions that return date/time values (like `date()`, `now()`, `timestamp()`) are supported - -Argument type: INTEGER/LONG/DOUBLE/TIMESTAMP, STRING - -Return type: STRING - -Format specifiers: - -.. list-table:: The following table describes the available specifier arguments. - :widths: 20 80 - :header-rows: 1 - - * - Specifier - - Description - * - %a - - Abbreviated weekday name (Mon..Sun) - * - %A - - Weekday name (Mon..Sun) - Note: Locale.ROOT uses abbreviated form - * - %b - - Abbreviated month name (Jan..Dec) - * - %B - - Month name (Jan..Dec) - Note: Locale.ROOT uses abbreviated form - * - %c - - Date and time (e.g., Mon Jul 18 09:30:00 2019) - * - %C - - Century as 2-digit decimal number - * - %d - - Day of the month, zero-padded (01..31) - * - %e - - Day of the month, space-padded ( 1..31) - * - %Ez - - Timezone offset in minutes from UTC (e.g., +0 for UTC, +330 for IST, -300 for EST) - * - %f - - Microseconds as decimal number (000000..999999) - * - %F - - ISO 8601 date format (%Y-%m-%d) - * - %g - - ISO 8601 year without century (00..99) - * - %G - - ISO 8601 year with century - * - %H - - Hour (24-hour clock) (00..23) - * - %I - - Hour (12-hour clock) (01..12) - * - %j - - Day of year (001..366) - * - %k - - Hour (24-hour clock), space-padded ( 0..23) - * - %m - - Month as decimal number (01..12) - * - %M - - Minute (00..59) - * - %N - - Subsecond digits (default %9N = nanoseconds). Accepts any precision value from 1-9 (e.g., %3N = 3 digits, %5N = 5 digits, %9N = 9 digits). The precision directly controls the number of digits displayed - * - %p - - AM or PM - * - %Q - - Subsecond component (default milliseconds). Can specify precision: %3Q = milliseconds, %6Q = microseconds, %9Q = nanoseconds. Other precision values (e.g., %5Q) default to %3Q - * - %s - - UNIX Epoch timestamp in seconds - * - %S - - Second (00..59) - * - %T - - Time in 24-hour notation (%H:%M:%S) - * - %U - - Week of year starting from 0 (00..53) - * - %V - - ISO week number (01..53) - * - %w - - Weekday as decimal (0=Sunday..6=Saturday) - * - %x - - Date in MM/dd/yyyy format (e.g., 07/13/2019) - * - %X - - Time in HH:mm:ss format (e.g., 09:30:00) - * - %y - - Year without century (00..99) - * - %Y - - Year with century - * - %z - - Timezone offset (+hhmm or -hhmm) - * - %:z - - Timezone offset with colon (+hh:mm or -hh:mm) - * - %::z - - Timezone offset with colons (+hh:mm:ss) - * - %:::z - - Timezone offset hour only (+hh or -hh) - * - %Z - - Timezone abbreviation (e.g., EST, PDT) - * - %% - - Literal % character - -Examples:: - - #os> source=people | eval `strftime(1521467703, "%Y-%m-%dT%H:%M:%S")` = strftime(1521467703, "%Y-%m-%dT%H:%M:%S") | fields `strftime(1521467703, "%Y-%m-%dT%H:%M:%S")` - fetched rows / total rows = 1/1 - +-------------------------------------------+ - | strftime(1521467703, "%Y-%m-%dT%H:%M:%S") | - |-------------------------------------------| - | 2018-03-19T13:55:03 | - +-------------------------------------------+ - - #os> source=people | eval `strftime(1521467703, "%F %T")` = strftime(1521467703, "%F %T") | fields `strftime(1521467703, "%F %T")` - fetched rows / total rows = 1/1 - +-------------------------------+ - | strftime(1521467703, "%F %T") | - |-------------------------------| - | 2018-03-19 13:55:03 | - +-------------------------------+ - - #os> source=people | eval `strftime(1521467703, "%a %b %d, %Y")` = strftime(1521467703, "%a %b %d, %Y") | fields `strftime(1521467703, "%a %b %d, %Y")` - fetched rows / total rows = 1/1 - +--------------------------------------+ - | strftime(1521467703, "%a %b %d, %Y") | - |--------------------------------------| - | Mon Mar 19, 2018 | - +--------------------------------------+ - - #os> source=people | eval `strftime(1521467703, "%%Y")` = strftime(1521467703, "%%Y") | fields `strftime(1521467703, "%%Y")` - fetched rows / total rows = 1/1 - +---------------------------+ - | strftime(1521467703, "%%Y") | - |---------------------------| - | %Y | - +---------------------------+ - - #os> source=people | eval `strftime(date('2020-09-16'), "%Y-%m-%d")` = strftime(date('2020-09-16'), "%Y-%m-%d") | fields `strftime(date('2020-09-16'), "%Y-%m-%d")` - fetched rows / total rows = 1/1 - +----------------------------------------+ - | strftime(date('2020-09-16'), "%Y-%m-%d") | - |-----------------------------------------| - | 2020-09-16 | - +----------------------------------------+ - - #os> source=people | eval `strftime(timestamp('2020-09-16 14:30:00'), "%F %T")` = strftime(timestamp('2020-09-16 14:30:00'), "%F %T") | fields `strftime(timestamp('2020-09-16 14:30:00'), "%F %T")` - fetched rows / total rows = 1/1 - +--------------------------------------------------+ - | strftime(timestamp('2020-09-16 14:30:00'), "%F %T") | - |---------------------------------------------------| - | 2020-09-16 14:30:00 | - +--------------------------------------------------+ - - #os> source=people | eval `strftime(now(), "%Y-%m-%d %H:%M:%S")` = strftime(now(), "%Y-%m-%d %H:%M:%S") | fields `strftime(now(), "%Y-%m-%d %H:%M:%S")` - fetched rows / total rows = 1/1 - +------------------------------------+ - | strftime(now(), "%Y-%m-%d %H:%M:%S") | - |-------------------------------------| - | 2025-09-03 12:30:45 | - +------------------------------------+ - - -STR_TO_DATE ------------ - -Description ->>>>>>>>>>> - -Usage: str_to_date(string, string) is used to extract a TIMESTAMP from the first argument string using the formats specified in the second argument string. -The input argument must have enough information to be parsed as a DATE, TIMESTAMP, or TIME. -Acceptable string format specifiers are the same as those used in the `DATE_FORMAT`_ function. -It returns NULL when a statement cannot be parsed due to an invalid pair of arguments, and when 0 is provided for any DATE field. Otherwise, it will return a TIMESTAMP with the parsed values (as well as default values for any field that was not parsed). - -Argument type: STRING, STRING - -Return type: TIMESTAMP - -Example:: - - OS> source=people | eval `str_to_date("01,5,2013", "%d,%m,%Y")` = str_to_date("01,5,2013", "%d,%m,%Y") | fields = `str_to_date("01,5,2013", "%d,%m,%Y")` - fetched rows / total rows = 1/1 - +--------------------------------------+ - | str_to_date("01,5,2013", "%d,%m,%Y") | - |--------------------------------------| - | 2013-05-01 00:00:00 | - +--------------------------------------+ - - -SUBDATE -------- - -Description ->>>>>>>>>>> - -Usage: subdate(date, INTERVAL expr unit) / subdate(date, days) subtracts the interval expr from date; subdate(date, days) subtracts the second argument as integer number of days from date. -If first argument is TIME, today's date is used; if first argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, INTERVAL/LONG - -Return type map: - -(DATE/TIMESTAMP/TIME, INTERVAL) -> TIMESTAMP - -(DATE, LONG) -> DATE - -(TIMESTAMP/TIME, LONG) -> TIMESTAMP - -Synonyms: `DATE_SUB`_ when invoked with the INTERVAL form of the second argument. - -Antonyms: `ADDDATE`_ - -Example:: - - os> source=people | eval `'2008-01-02' - 31d` = SUBDATE(DATE('2008-01-02'), INTERVAL 31 DAY), `'2020-08-26' - 1` = SUBDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' - 1` = SUBDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2008-01-02' - 31d`, `'2020-08-26' - 1`, `ts '2020-08-26 01:01:01' - 1` - fetched rows / total rows = 1/1 - +---------------------+------------------+------------------------------+ - | '2008-01-02' - 31d | '2020-08-26' - 1 | ts '2020-08-26 01:01:01' - 1 | - |---------------------+------------------+------------------------------| - | 2007-12-02 00:00:00 | 2020-08-25 | 2020-08-25 01:01:01 | - +---------------------+------------------+------------------------------+ - - -SUBTIME -------- - -Description ->>>>>>>>>>> - -Usage: subtime(expr1, expr2) subtracts expr2 from expr1 and returns the result. If argument is TIME, today's date is used; if argument is DATE, time at midnight is used. - -Argument type: DATE/TIMESTAMP/TIME, DATE/TIMESTAMP/TIME - -Return type map: - -(DATE/TIMESTAMP, DATE/TIMESTAMP/TIME) -> TIMESTAMP - -(TIME, DATE/TIMESTAMP/TIME) -> TIME - -Antonyms: `ADDTIME`_ - -Example:: - - os> source=people | eval `'2008-12-12' - 0` = SUBTIME(DATE('2008-12-12'), DATE('2008-11-15')) | fields `'2008-12-12' - 0` - fetched rows / total rows = 1/1 - +---------------------+ - | '2008-12-12' - 0 | - |---------------------| - | 2008-12-12 00:00:00 | - +---------------------+ - - os> source=people | eval `'23:59:59' - 0` = SUBTIME(TIME('23:59:59'), DATE('2004-01-01')) | fields `'23:59:59' - 0` - fetched rows / total rows = 1/1 - +----------------+ - | '23:59:59' - 0 | - |----------------| - | 23:59:59 | - +----------------+ - - os> source=people | eval `'2004-01-01' - '23:59:59'` = SUBTIME(DATE('2004-01-01'), TIME('23:59:59')) | fields `'2004-01-01' - '23:59:59'` - fetched rows / total rows = 1/1 - +---------------------------+ - | '2004-01-01' - '23:59:59' | - |---------------------------| - | 2003-12-31 00:00:01 | - +---------------------------+ - - os> source=people | eval `'10:20:30' - '00:05:42'` = SUBTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' - '00:05:42'` - fetched rows / total rows = 1/1 - +-------------------------+ - | '10:20:30' - '00:05:42' | - |-------------------------| - | 10:14:48 | - +-------------------------+ - - os> source=people | eval `'2007-03-01 10:20:30' - '20:40:50'` = SUBTIME(TIMESTAMP('2007-03-01 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) | fields `'2007-03-01 10:20:30' - '20:40:50'` - fetched rows / total rows = 1/1 - +------------------------------------+ - | '2007-03-01 10:20:30' - '20:40:50' | - |------------------------------------| - | 2007-02-28 13:39:40 | - +------------------------------------+ - - -SYSDATE -------- - -Description ->>>>>>>>>>> - -Returns the current date and time as a value in 'YYYY-MM-DD hh:mm:ss[.nnnnnn]'. -SYSDATE() returns the date and time at which it executes in UTC. This differs from the behavior for `NOW() <#now>`_, which returns a constant time that indicates the time at which the statement began to execute. -If an argument is given, it specifies a fractional seconds precision from 0 to 6, the return value includes a fractional seconds part of that many digits. - -Optional argument type: INTEGER - -Return type: TIMESTAMP - -Specification: SYSDATE([INTEGER]) -> TIMESTAMP - -Example:: - - > source=people | eval `value_1` = SYSDATE(), `value_2` = SYSDATE(6) | fields `value_1`, `value_2` - fetched rows / total rows = 1/1 - +---------------------+----------------------------+ - | value_1 | value_2 | - |---------------------+----------------------------| - | 2022-08-02 15:39:05 | 2022-08-02 15:39:05.123456 | - +---------------------+----------------------------+ - - -TIME ----- - -Description ->>>>>>>>>>> - -Usage: time(expr) constructs a time type with the input string expr as a time. If the argument is of date/time/timestamp, it extracts the time value part from the expression. - -Argument type: STRING/DATE/TIME/TIMESTAMP - -Return type: TIME - -Example:: - - os> source=people | eval `TIME('13:49:00')` = TIME('13:49:00') | fields `TIME('13:49:00')` - fetched rows / total rows = 1/1 - +------------------+ - | TIME('13:49:00') | - |------------------| - | 13:49:00 | - +------------------+ - - os> source=people | eval `TIME('13:49')` = TIME('13:49') | fields `TIME('13:49')` - fetched rows / total rows = 1/1 - +---------------+ - | TIME('13:49') | - |---------------| - | 13:49:00 | - +---------------+ - - os> source=people | eval `TIME('2020-08-26 13:49:00')` = TIME('2020-08-26 13:49:00') | fields `TIME('2020-08-26 13:49:00')` - fetched rows / total rows = 1/1 - +-----------------------------+ - | TIME('2020-08-26 13:49:00') | - |-----------------------------| - | 13:49:00 | - +-----------------------------+ - - os> source=people | eval `TIME('2020-08-26 13:49')` = TIME('2020-08-26 13:49') | fields `TIME('2020-08-26 13:49')` - fetched rows / total rows = 1/1 - +--------------------------+ - | TIME('2020-08-26 13:49') | - |--------------------------| - | 13:49:00 | - +--------------------------+ - - -TIME_FORMAT ------------ - -Description ->>>>>>>>>>> - -Usage: time_format(time, format) formats the time argument using the specifiers in the format argument. -This supports a subset of the time format specifiers available for the `date_format`_ function. -Using date format specifiers supported by `date_format`_ will return 0 or null. -Acceptable format specifiers are listed in the table below. -If an argument of type DATE is passed in, it is treated as a TIMESTAMP at midnight (i.e., 00:00:00). - -.. list-table:: The following table describes the available specifier arguments. - :widths: 20 80 - :header-rows: 1 - - * - Specifier - - Description - * - %f - - Microseconds (000000..999999) - * - %H - - Hour (00..23) - * - %h - - Hour (01..12) - * - %I - - Hour (01..12) - * - %i - - Minutes, numeric (00..59) - * - %p - - AM or PM - * - %r - - Time, 12-hour (hh:mm:ss followed by AM or PM) - * - %S - - Seconds (00..59) - * - %s - - Seconds (00..59) - * - %T - - Time, 24-hour (hh:mm:ss) - - -Argument type: STRING/DATE/TIME/TIMESTAMP, STRING - -Return type: STRING - -Example:: - - os> source=people | eval `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` = TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | fields `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` - fetched rows / total rows = 1/1 - +----------------------------------------------------------------------------+ - | TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | - |----------------------------------------------------------------------------| - | 012345 13 01 01 14 PM 01:14:15 PM 15 15 13:14:15 | - +----------------------------------------------------------------------------+ - - -TIME_TO_SEC ------------ - -Description ->>>>>>>>>>> - -Usage: time_to_sec(time) returns the time argument, converted to seconds. - -Argument type: STRING/TIME/TIMESTAMP - -Return type: LONG - -Example:: - - os> source=people | eval `TIME_TO_SEC(TIME('22:23:00'))` = TIME_TO_SEC(TIME('22:23:00')) | fields `TIME_TO_SEC(TIME('22:23:00'))` - fetched rows / total rows = 1/1 - +-------------------------------+ - | TIME_TO_SEC(TIME('22:23:00')) | - |-------------------------------| - | 80580 | - +-------------------------------+ - - -TIMEDIFF --------- - -Description ->>>>>>>>>>> - -Usage: returns the difference between two time expressions as a time. - -Argument type: TIME, TIME - -Return type: TIME - -Example:: - - os> source=people | eval `TIMEDIFF('23:59:59', '13:00:00')` = TIMEDIFF('23:59:59', '13:00:00') | fields `TIMEDIFF('23:59:59', '13:00:00')` - fetched rows / total rows = 1/1 - +----------------------------------+ - | TIMEDIFF('23:59:59', '13:00:00') | - |----------------------------------| - | 10:59:59 | - +----------------------------------+ - - -TIMESTAMP ---------- - -Description ->>>>>>>>>>> - -Usage: timestamp(expr) constructs a timestamp type with the input string `expr` as an timestamp. If the argument is not a string, it casts `expr` to timestamp type with default timezone UTC. If argument is a time, it applies today's date before cast. -With two arguments `timestamp(expr1, expr2)` adds the time expression `expr2` to the date or timestamp expression `expr1` and returns the result as a timestamp value. - -Argument type: STRING/DATE/TIME/TIMESTAMP - -Return type map: - -(STRING/DATE/TIME/TIMESTAMP) -> TIMESTAMP - -(STRING/DATE/TIME/TIMESTAMP, STRING/DATE/TIME/TIMESTAMP) -> TIMESTAMP - -Example:: - - os> source=people | eval `TIMESTAMP('2020-08-26 13:49:00')` = TIMESTAMP('2020-08-26 13:49:00'), `TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42'))` = TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | fields `TIMESTAMP('2020-08-26 13:49:00')`, `TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42'))` - fetched rows / total rows = 1/1 - +----------------------------------+----------------------------------------------------+ - | TIMESTAMP('2020-08-26 13:49:00') | TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | - |----------------------------------+----------------------------------------------------| - | 2020-08-26 13:49:00 | 2020-08-27 02:04:42 | - +----------------------------------+----------------------------------------------------+ - - -TIMESTAMPADD ------------- - -Description ->>>>>>>>>>> - -Usage: Returns a TIMESTAMP value based on a passed in DATE/TIME/TIMESTAMP/STRING argument and an INTERVAL and INTEGER argument which determine the amount of time to be added. -If the third argument is a STRING, it must be formatted as a valid TIMESTAMP. If only a TIME is provided, a TIMESTAMP is still returned with the DATE portion filled in using the current date. -If the third argument is a DATE, it will be automatically converted to a TIMESTAMP. - -Argument type: INTERVAL, INTEGER, DATE/TIME/TIMESTAMP/STRING - -INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] - -Examples:: - - os> source=people | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` - fetched rows / total rows = 1/1 - +----------------------------------------------+--------------------------------------------------+ - | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | - |----------------------------------------------+--------------------------------------------------| - | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | - +----------------------------------------------+--------------------------------------------------+ - - -TIMESTAMPDIFF -------------- - -Description ->>>>>>>>>>> - -Usage: TIMESTAMPDIFF(interval, start, end) returns the difference between the start and end date/times in interval units. -If a TIME is provided as an argument, it will be converted to a TIMESTAMP with the DATE portion filled in using the current date. -Arguments will be automatically converted to a TIME/TIMESTAMP when appropriate. -Any argument that is a STRING must be formatted as a valid TIMESTAMP. - -Argument type: INTERVAL, DATE/TIME/TIMESTAMP/STRING, DATE/TIME/TIMESTAMP/STRING - -INTERVAL must be one of the following tokens: [MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR] - -Examples:: - - os> source=people | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` = TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` - fetched rows / total rows = 1/1 - +-------------------------------------------------------------------+-----------------------------------------------------------+ - | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | - |-------------------------------------------------------------------+-----------------------------------------------------------| - | 4 | -23 | - +-------------------------------------------------------------------+-----------------------------------------------------------+ - - -TO_DAYS -------- - -Description ->>>>>>>>>>> - -Usage: to_days(date) returns the day number (the number of days since year 0) of the given date. Returns NULL if date is invalid. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: LONG - -Example:: - - os> source=people | eval `TO_DAYS(DATE('2008-10-07'))` = TO_DAYS(DATE('2008-10-07')) | fields `TO_DAYS(DATE('2008-10-07'))` - fetched rows / total rows = 1/1 - +-----------------------------+ - | TO_DAYS(DATE('2008-10-07')) | - |-----------------------------| - | 733687 | - +-----------------------------+ - - -TO_SECONDS ----------- - -Description ->>>>>>>>>>> - -Usage: to_seconds(date) returns the number of seconds since the year 0 of the given value. Returns NULL if value is invalid. -An argument of a LONG type can be used. It must be formatted as YMMDD, YYMMDD, YYYMMDD or YYYYMMDD. Note that a LONG type argument cannot have leading 0s as it will be parsed using an octal numbering system. - -Argument type: STRING/LONG/DATE/TIME/TIMESTAMP - -Return type: LONG - -Example:: - - os> source=people | eval `TO_SECONDS(DATE('2008-10-07'))` = TO_SECONDS(DATE('2008-10-07')) | eval `TO_SECONDS(950228)` = TO_SECONDS(950228) | fields `TO_SECONDS(DATE('2008-10-07'))`, `TO_SECONDS(950228)` - fetched rows / total rows = 1/1 - +--------------------------------+--------------------+ - | TO_SECONDS(DATE('2008-10-07')) | TO_SECONDS(950228) | - |--------------------------------+--------------------| - | 63390556800 | 62961148800 | - +--------------------------------+--------------------+ - - -UNIX_TIMESTAMP --------------- - -Description ->>>>>>>>>>> - -Usage: Converts given argument to Unix time (seconds since Epoch - very beginning of year 1970). If no argument given, it returns the current Unix time. -The date argument may be a DATE, or TIMESTAMP string, or a number in YYMMDD, YYMMDDhhmmss, YYYYMMDD, or YYYYMMDDhhmmss format. If the argument includes a time part, it may optionally include a fractional seconds part. -If argument is in invalid format or outside of range 1970-01-01 00:00:00 - 3001-01-18 23:59:59.999999 (0 to 32536771199.999999 epoch time), function returns NULL. -You can use `FROM_UNIXTIME`_ to do reverse conversion. - -Argument type: /DOUBLE/DATE/TIMESTAMP - -Return type: DOUBLE - -Example:: - - os> source=people | eval `UNIX_TIMESTAMP(double)` = UNIX_TIMESTAMP(20771122143845), `UNIX_TIMESTAMP(timestamp)` = UNIX_TIMESTAMP(TIMESTAMP('1996-11-15 17:05:42')) | fields `UNIX_TIMESTAMP(double)`, `UNIX_TIMESTAMP(timestamp)` - fetched rows / total rows = 1/1 - +------------------------+---------------------------+ - | UNIX_TIMESTAMP(double) | UNIX_TIMESTAMP(timestamp) | - |------------------------+---------------------------| - | 3404817525.0 | 848077542.0 | - +------------------------+---------------------------+ - - -UTC_DATE --------- - -Description ->>>>>>>>>>> - -Returns the current UTC date as a value in 'YYYY-MM-DD'. - -Return type: DATE - -Specification: UTC_DATE() -> DATE - -Example:: - - > source=people | eval `UTC_DATE()` = UTC_DATE() | fields `UTC_DATE()` - fetched rows / total rows = 1/1 - +------------+ - | UTC_DATE() | - |------------| - | 2022-10-03 | - +------------+ - - -UTC_TIME --------- - -Description ->>>>>>>>>>> - -Returns the current UTC time as a value in 'hh:mm:ss'. - -Return type: TIME - -Specification: UTC_TIME() -> TIME - -Example:: - - > source=people | eval `UTC_TIME()` = UTC_TIME() | fields `UTC_TIME()` - fetched rows / total rows = 1/1 - +------------+ - | UTC_TIME() | - |------------| - | 17:54:27 | - +------------+ - - -UTC_TIMESTAMP -------------- - -Description ->>>>>>>>>>> - -Returns the current UTC timestamp as a value in 'YYYY-MM-DD hh:mm:ss'. - -Return type: TIMESTAMP - -Specification: UTC_TIMESTAMP() -> TIMESTAMP - -Example:: - - > source=people | eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() | fields `UTC_TIMESTAMP()` - fetched rows / total rows = 1/1 - +---------------------+ - | UTC_TIMESTAMP() | - |---------------------| - | 2022-10-03 17:54:28 | - +---------------------+ - - -WEEK ----- - -Description ->>>>>>>>>>> - -Usage: week(date[, mode]) returns the week number for date. If the mode argument is omitted, the default mode 0 is used. - -.. list-table:: The following table describes how the mode argument works. - :widths: 25 50 25 75 - :header-rows: 1 - - * - Mode - - First day of week - - Range - - Week 1 is the first week ... - * - 0 - - Sunday - - 0-53 - - with a Sunday in this year - * - 1 - - Monday - - 0-53 - - with 4 or more days this year - * - 2 - - Sunday - - 1-53 - - with a Sunday in this year - * - 3 - - Monday - - 1-53 - - with 4 or more days this year - * - 4 - - Sunday - - 0-53 - - with 4 or more days this year - * - 5 - - Monday - - 0-53 - - with a Monday in this year - * - 6 - - Sunday - - 1-53 - - with 4 or more days this year - * - 7 - - Monday - - 1-53 - - with a Monday in this year - -Argument type: DATE/TIMESTAMP/STRING - -Return type: INTEGER - -Synonyms: `WEEK_OF_YEAR`_ - -Example:: - - os> source=people | eval `WEEK(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK(DATE('2008-02-20'), 1)` = WEEK(DATE('2008-02-20'), 1) | fields `WEEK(DATE('2008-02-20'))`, `WEEK(DATE('2008-02-20'), 1)` - fetched rows / total rows = 1/1 - +--------------------------+-----------------------------+ - | WEEK(DATE('2008-02-20')) | WEEK(DATE('2008-02-20'), 1) | - |--------------------------+-----------------------------| - | 7 | 8 | - +--------------------------+-----------------------------+ - - -WEEKDAY -------- - -Description ->>>>>>>>>>> - -Usage: weekday(date) returns the weekday index for date (0 = Monday, 1 = Tuesday, ..., 6 = Sunday). - -It is similar to the `dayofweek`_ function, but returns different indexes for each day. - -Argument type: STRING/DATE/TIME/TIMESTAMP - -Return type: INTEGER - -Example:: - - os> source=people | eval `weekday(DATE('2020-08-26'))` = weekday(DATE('2020-08-26')) | eval `weekday(DATE('2020-08-27'))` = weekday(DATE('2020-08-27')) | fields `weekday(DATE('2020-08-26'))`, `weekday(DATE('2020-08-27'))` - fetched rows / total rows = 1/1 - +-----------------------------+-----------------------------+ - | weekday(DATE('2020-08-26')) | weekday(DATE('2020-08-27')) | - |-----------------------------+-----------------------------| - | 2 | 3 | - +-----------------------------+-----------------------------+ - - -WEEK_OF_YEAR ------------- - -Description ->>>>>>>>>>> - -Usage: week_of_year(date[, mode]) returns the week number for date. If the mode argument is omitted, the default mode 0 is used. - -.. list-table:: The following table describes how the mode argument works. - :widths: 25 50 25 75 - :header-rows: 1 - - * - Mode - - First day of week - - Range - - Week 1 is the first week ... - * - 0 - - Sunday - - 0-53 - - with a Sunday in this year - * - 1 - - Monday - - 0-53 - - with 4 or more days this year - * - 2 - - Sunday - - 1-53 - - with a Sunday in this year - * - 3 - - Monday - - 1-53 - - with 4 or more days this year - * - 4 - - Sunday - - 0-53 - - with 4 or more days this year - * - 5 - - Monday - - 0-53 - - with a Monday in this year - * - 6 - - Sunday - - 1-53 - - with 4 or more days this year - * - 7 - - Monday - - 1-53 - - with a Monday in this year - -Argument type: DATE/TIMESTAMP/STRING - -Return type: INTEGER - -Synonyms: `WEEK`_ - -Example:: - - os> source=people | eval `WEEK_OF_YEAR(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` = WEEK_OF_YEAR(DATE('2008-02-20'), 1) | fields `WEEK_OF_YEAR(DATE('2008-02-20'))`, `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` - fetched rows / total rows = 1/1 - +----------------------------------+-------------------------------------+ - | WEEK_OF_YEAR(DATE('2008-02-20')) | WEEK_OF_YEAR(DATE('2008-02-20'), 1) | - |----------------------------------+-------------------------------------| - | 7 | 8 | - +----------------------------------+-------------------------------------+ - - -YEAR ----- - -Description ->>>>>>>>>>> - -Usage: year(date) returns the year for date, in the range 1000 to 9999, or 0 for the “zero” date. - -Argument type: STRING/DATE/TIMESTAMP - -Return type: INTEGER - -Example:: - - os> source=people | eval `YEAR(DATE('2020-08-26'))` = YEAR(DATE('2020-08-26')) | fields `YEAR(DATE('2020-08-26'))` - fetched rows / total rows = 1/1 - +--------------------------+ - | YEAR(DATE('2020-08-26')) | - |--------------------------| - | 2020 | - +--------------------------+ - - -YEARWEEK --------- - -Description ->>>>>>>>>>> - -Usage: yearweek(date[, mode]) returns the year and week for date as an integer. It accepts and optional mode arguments aligned with those available for the `WEEK`_ function. - -Argument type: STRING/DATE/TIME/TIMESTAMP - -Return type: INTEGER - -Example:: - - os> source=people | eval `YEARWEEK('2020-08-26')` = YEARWEEK('2020-08-26') | eval `YEARWEEK('2019-01-05', 1)` = YEARWEEK('2019-01-05', 1) | fields `YEARWEEK('2020-08-26')`, `YEARWEEK('2019-01-05', 1)` - fetched rows / total rows = 1/1 - +------------------------+---------------------------+ - | YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 1) | - |------------------------+---------------------------| - | 202034 | 201901 | - +------------------------+---------------------------+ - diff --git a/docs/user/ppl/functions/expressions.md b/docs/user/ppl/functions/expressions.md new file mode 100644 index 00000000000..999531cabbe --- /dev/null +++ b/docs/user/ppl/functions/expressions.md @@ -0,0 +1,185 @@ +# Expressions + +## Introduction + +Expressions, particularly value expressions, are those which return a scalar value. Expressions have different types and forms. For example, there are literal values as atom expression and arithmetic, predicate and function expression built on top of them. And also expressions can be used in different clauses, such as using arithmetic expression in `Filter`, `Stats` command. +## Arithmetic Operators + +### Description + +#### Operators + +Arithmetic expression is an expression formed by numeric literals and binary arithmetic operators as follows: +1. `+`: Add. +2. `-`: Subtract. +3. `*`: Multiply. +4. `/`: Divide. Integer operands follow the legacy truncating result when + + [plugins.ppl.syntax.legacy.preferred](../admin/settings.md) is `true` (default). When the + setting is `false` the operands are promoted to floating point, preserving + the fractional part. Division by zero still returns `NULL`. +5. `%`: Modulo. This can be used with integers only with remainder of the division as result. + +#### Precedence + +Parentheses can be used to control the precedence of arithmetic operators. Otherwise, operators of higher precedence is performed first. +#### Type Conversion + +Implicit type conversion is performed when looking up operator signature. For example, an integer `+` a real number matches signature `+(double,double)` which results in a real number. This rule also applies to function call discussed below. +### Examples + +Here is an example for different type of arithmetic expressions + +```ppl +source=accounts +| where age > (25 + 5) +| fields age +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++-----+ +| age | +|-----| +| 32 | +| 36 | +| 33 | ++-----+ +``` + +## Predicate Operators + +### Description + +Predicate operator is an expression that evaluated to be ture. The MISSING and NULL value comparison has following the rule. MISSING value only equal to MISSING value and less than all the other values. NULL value equals to NULL value, large than MISSING value, but less than all the other values. +#### Operators + +| name | description | +| --- | --- | +| > | Greater than operator | +| >= | Greater than or equal operator | +| < | Less than operator | +| != | Not equal operator | +| <= | Less than or equal operator | +| = | Equal operator | +| == | Equal operator (alternative syntax) | +| LIKE | Simple Pattern matching | +| IN | NULL value test | +| AND | AND operator | +| OR | OR operator | +| XOR | XOR operator | +| NOT | NOT NULL value test | + +It is possible to compare datetimes. When comparing different datetime types, for example `DATE` and `TIME`, both converted to `DATETIME`. +The following rule is applied on coversion: a `TIME` applied to today's date; `DATE` is interpreted at midnight. +### Examples + +#### Basic Predicate Operator + +Here is an example for comparison operators + +```ppl +source=accounts +| where age > 33 +| fields age +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----+ +| age | +|-----| +| 36 | ++-----+ +``` + +The `==` operator can be used as an alternative to `=` for equality comparisons + +```ppl +source=accounts +| where age == 32 +| fields age +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----+ +| age | +|-----| +| 32 | ++-----+ +``` + +Note: Both `=` and `==` perform the same equality comparison. You can use either based on your preference. +#### IN + +IN operator test field in value lists + +```ppl +source=accounts +| where age in (32, 33) +| fields age +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----+ +| age | +|-----| +| 32 | +| 33 | ++-----+ +``` + +#### OR + +OR operator + +```ppl +source=accounts +| where age = 32 OR age = 33 +| fields age +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----+ +| age | +|-----| +| 32 | +| 33 | ++-----+ +``` + +#### NOT + +NOT operator + +```ppl +source=accounts +| where not age in (32, 33) +| fields age +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----+ +| age | +|-----| +| 36 | +| 28 | ++-----+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/expressions.rst b/docs/user/ppl/functions/expressions.rst deleted file mode 100644 index 2b30c739a45..00000000000 --- a/docs/user/ppl/functions/expressions.rst +++ /dev/null @@ -1,177 +0,0 @@ -=========== -Expressions -=========== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 3 - - -Introduction -============ - -Expressions, particularly value expressions, are those which return a scalar value. Expressions have different types and forms. For example, there are literal values as atom expression and arithmetic, predicate and function expression built on top of them. And also expressions can be used in different clauses, such as using arithmetic expression in ``Filter``, ``Stats`` command. - -Arithmetic Operators -==================== - -Description ------------ - -Operators -````````` - -Arithmetic expression is an expression formed by numeric literals and binary arithmetic operators as follows: - -1. ``+``: Add. -2. ``-``: Subtract. -3. ``*``: Multiply. -4. ``/``: Divide. Integer operands follow the legacy truncating result when - `plugins.ppl.syntax.legacy.preferred <../admin/settings.rst>`_ is ``true`` (default). When the - setting is ``false`` the operands are promoted to floating point, preserving - the fractional part. Division by zero still returns ``NULL``. -5. ``%``: Modulo. This can be used with integers only with remainder of the division as result. - -Precedence -`````````` - -Parentheses can be used to control the precedence of arithmetic operators. Otherwise, operators of higher precedence is performed first. - -Type Conversion -``````````````` - -Implicit type conversion is performed when looking up operator signature. For example, an integer ``+`` a real number matches signature ``+(double,double)`` which results in a real number. This rule also applies to function call discussed below. - -Examples --------- - -Here is an example for different type of arithmetic expressions:: - - os> source=accounts | where age > (25 + 5) | fields age ; - fetched rows / total rows = 3/3 - +-----+ - | age | - |-----| - | 32 | - | 36 | - | 33 | - +-----+ - -Predicate Operators -=================== - -Description ------------ - -Predicate operator is an expression that evaluated to be ture. The MISSING and NULL value comparison has following the rule. MISSING value only equal to MISSING value and less than all the other values. NULL value equals to NULL value, large than MISSING value, but less than all the other values. - -Operators -````````` - -+----------------+----------------------------------------+ -| name | description | -+----------------+----------------------------------------+ -| > | Greater than operator | -+----------------+----------------------------------------+ -| >= | Greater than or equal operator | -+----------------+----------------------------------------+ -| < | Less than operator | -+----------------+----------------------------------------+ -| != | Not equal operator | -+----------------+----------------------------------------+ -| <= | Less than or equal operator | -+----------------+----------------------------------------+ -| = | Equal operator | -+----------------+----------------------------------------+ -| == | Equal operator (alternative syntax) | -+----------------+----------------------------------------+ -| LIKE | Simple Pattern matching | -+----------------+----------------------------------------+ -| IN | NULL value test | -+----------------+----------------------------------------+ -| AND | AND operator | -+----------------+----------------------------------------+ -| OR | OR operator | -+----------------+----------------------------------------+ -| XOR | XOR operator | -+----------------+----------------------------------------+ -| NOT | NOT NULL value test | -+----------------+----------------------------------------+ - -It is possible to compare datetimes. When comparing different datetime types, for example `DATE` and `TIME`, both converted to `DATETIME`. -The following rule is applied on coversion: a `TIME` applied to today's date; `DATE` is interpreted at midnight. - -Examples --------- - -Basic Predicate Operator -```````````````````````` - -Here is an example for comparison operators:: - - os> source=accounts | where age > 33 | fields age ; - fetched rows / total rows = 1/1 - +-----+ - | age | - |-----| - | 36 | - +-----+ - -The ``==`` operator can be used as an alternative to ``=`` for equality comparisons:: - - os> source=accounts | where age == 32 | fields age ; - fetched rows / total rows = 1/1 - +-----+ - | age | - |-----| - | 32 | - +-----+ - -Note: Both ``=`` and ``==`` perform the same equality comparison. You can use either based on your preference. - - -IN -`` - -IN operator test field in value lists:: - - os> source=accounts | where age in (32, 33) | fields age ; - fetched rows / total rows = 2/2 - +-----+ - | age | - |-----| - | 32 | - | 33 | - +-----+ - - -OR -`` - -OR operator :: - - os> source=accounts | where age = 32 OR age = 33 | fields age ; - fetched rows / total rows = 2/2 - +-----+ - | age | - |-----| - | 32 | - | 33 | - +-----+ - - -NOT -``` - -NOT operator :: - - os> source=accounts | where not age in (32, 33) | fields age ; - fetched rows / total rows = 2/2 - +-----+ - | age | - |-----| - | 36 | - | 28 | - +-----+ diff --git a/docs/user/ppl/functions/ip.md b/docs/user/ppl/functions/ip.md new file mode 100644 index 00000000000..673a0a8d250 --- /dev/null +++ b/docs/user/ppl/functions/ip.md @@ -0,0 +1,61 @@ +# IP Address Functions + +## CIDRMATCH + +### Description + +Usage: `cidrmatch(ip, cidr)` checks if `ip` is within the specified `cidr` range. +Argument type: STRING/IP, STRING +Return type: BOOLEAN +Example + +```ppl +source=weblogs +| where cidrmatch(host, '1.2.3.0/24') +| fields host, url +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++---------+--------------------+ +| host | url | +|---------+--------------------| +| 1.2.3.4 | /history/voyager1/ | +| 1.2.3.5 | /history/voyager2/ | ++---------+--------------------+ +``` + +Note: + - `ip` can be an IPv4 or IPv6 address + - `cidr` can be an IPv4 or IPv6 block + - `ip` and `cidr` must both be valid and non-missing/non-null + +## GEOIP + +### Description + +Usage: `geoip(dataSourceName, ipAddress[, options])` to lookup location information from given IP addresses via OpenSearch GeoSpatial plugin API. +Argument type: STRING, STRING/IP, STRING +Return type: OBJECT +Example: + +```ppl ignore +source=weblogs +| eval LookupResult = geoip("dataSourceName", "50.68.18.229", "country_iso_code,city_name") +``` + +```text +fetched rows / total rows = 1/1 ++-------------------------------------------------------------+ +| LookupResult | +|-------------------------------------------------------------| +| {'city_name': 'Vancouver', 'country_iso_code': 'CA'} | ++-------------------------------------------------------------+ +``` + +Note: + - `dataSourceName` must be an established dataSource on OpenSearch GeoSpatial plugin, detail of configuration can be found: https://opensearch.org/docs/latest/ingest-pipelines/processors/ip2geo/ + - `ip` can be an IPv4 or an IPv6 address + - `options` is an optional String of comma separated fields to output: the selection of fields is subject to dataSourceProvider's schema. For example, the list of fields in the provided `geolite2-city` dataset includes: "country_iso_code", "country_name", "continent_name", "region_iso_code", "region_name", "city_name", "time_zone", "location" \ No newline at end of file diff --git a/docs/user/ppl/functions/ip.rst b/docs/user/ppl/functions/ip.rst deleted file mode 100644 index ec853c27093..00000000000 --- a/docs/user/ppl/functions/ip.rst +++ /dev/null @@ -1,69 +0,0 @@ -==================== -IP Address Functions -==================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -CIDRMATCH ---------- - -Description ->>>>>>>>>>> - -Usage: `cidrmatch(ip, cidr)` checks if `ip` is within the specified `cidr` range. - -Argument type: STRING/IP, STRING - -Return type: BOOLEAN - -Example:: - - > source=weblogs | where cidrmatch(host, '1.2.3.0/24') | fields host, url - fetched rows / total rows = 2/2 - +---------+--------------------+ - | host | url | - |---------|--------------------| - | 1.2.3.4 | /history/voyager1/ | - | 1.2.3.5 | /history/voyager2/ | - +---------+--------------------+ - -Note: - - `ip` can be an IPv4 or IPv6 address - - `cidr` can be an IPv4 or IPv6 block - - `ip` and `cidr` must both be valid and non-missing/non-null - - -GEOIP ---------- - -Description ->>>>>>>>>>> - -Usage: `geoip(dataSourceName, ipAddress[, options])` to lookup location information from given IP addresses via OpenSearch GeoSpatial plugin API. - -Argument type: STRING, STRING/IP, STRING - -Return type: OBJECT - -.. The execution of below example is being excluded, as this requires a standalone Geo-Spatial dataSource setup, which is not yet supported by docTest. - -Example: - - > source=weblogs | eval LookupResult = geoip("dataSourceName", "50.68.18.229", "country_iso_code,city_name") - fetched rows / total rows = 1/1 - +-------------------------------------------------------------+ - | LookupResult | - |-------------------------------------------------------------| - | {'city_name': 'Vancouver', 'country_iso_code': 'CA'} | - +-------------------------------------------------------------+ - - -Note: - - `dataSourceName` must be an established dataSource on OpenSearch GeoSpatial plugin, detail of configuration can be found: https://opensearch.org/docs/latest/ingest-pipelines/processors/ip2geo/ - - `ip` can be an IPv4 or an IPv6 address - - `options` is an optional String of comma separated fields to output: the selection of fields is subject to dataSourceProvider's schema. For example, the list of fields in the provided `geolite2-city` dataset includes: "country_iso_code", "country_name", "continent_name", "region_iso_code", "region_name", "city_name", "time_zone", "location" - diff --git a/docs/user/ppl/functions/json.md b/docs/user/ppl/functions/json.md new file mode 100644 index 00000000000..e7cec247a81 --- /dev/null +++ b/docs/user/ppl/functions/json.md @@ -0,0 +1,502 @@ +# JSON Functions + +## JSON Path + +### Description + +All JSON paths used in JSON functions follow the format `{}.{}...`. +Each `` represents a field name. The `{}` part is optional and is only applicable when the corresponding key refers to an array. +For example + +```bash +a{2}.b{0} + +``` + +This refers to the element at index 0 of the `b` array, which is nested inside the element at index 2 of the `a` array. +Notes: +1. The `{}` notation applies **only when** the associated key points to an array. +2. `{}` (without a specific index) is interpreted as a **wildcard**, equivalent to `{*}`, meaning "all elements" in the array at that level. + +## JSON + +### Description + +Usage: `json(value)` Evaluates whether a string can be parsed as a json-encoded string. Returns the value if valid, null otherwise. +Argument type: STRING +Return type: STRING +Example + +```ppl +source=json_test +| where json_valid(json_string) +| eval json=json(json_string) +| fields test_name, json_string, json +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------------------+---------------------------------+---------------------------------+ +| test_name | json_string | json | +|--------------------+---------------------------------+---------------------------------| +| json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {"a":"1","b":{"c":"2","d":"3"}} | +| json object | {"a":"1","b":"2"} | {"a":"1","b":"2"} | +| json array | [1, 2, 3, 4] | [1, 2, 3, 4] | +| json scalar string | "abc" | "abc" | ++--------------------+---------------------------------+---------------------------------+ +``` + +## JSON_VALID + +### Description + +Version: 3.1.0 +Limitation: Only works when plugins.calcite.enabled=true +Usage: `json_valid(value)` Evaluates whether a string uses valid JSON syntax. Returns TRUE if valid, FALSE if invalid. NULL input returns NULL. +Argument type: STRING +Return type: BOOLEAN +Example + +```ppl +source=people +| eval is_valid_json = json_valid('[1,2,3,4]'), is_invalid_json = json_valid('{invalid}') +| fields is_valid_json, is_invalid_json +| head 1 +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------+-----------------+ +| is_valid_json | is_invalid_json | +|---------------+-----------------| +| True | False | ++---------------+-----------------+ +``` + +## JSON_OBJECT + +### Description + +Usage: `json_object(key1, value1, key2, value2...)` create a json object string with key value pairs. The key must be string. +Argument type: key1: STRING, value1: ANY, key2: STRING, value2: ANY ... +Return type: STRING +Example + +```ppl +source=json_test +| eval test_json = json_object('key', 123.45) +| head 1 +| fields test_json +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+ +| test_json | +|----------------| +| {"key":123.45} | ++----------------+ +``` + +## JSON_ARRAY + +### Description + +Usage: `json_array(element1, element2, ...)` create a json array string with elements. +Argument type: element1: ANY, element2: ANY ... +Return type: STRING +Example + +```ppl +source=json_test +| eval test_json_array = json_array('key', 123.45) +| head 1 +| fields test_json_array +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------+ +| test_json_array | +|-----------------| +| ["key",123.45] | ++-----------------+ +``` + +## JSON_ARRAY_LENGTH + +### Description + +Usage: `json_array_length(value)` parse the string to json array and return size,, null is returned in case of any other valid JSON string, null or an invalid JSON. +Argument type: value: A JSON STRING +Return type: INTEGER +Example + +```ppl +source=json_test +| eval array_length = json_array_length("[1,2,3]") +| head 1 +| fields array_length +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| array_length | +|--------------| +| 3 | ++--------------+ +``` + +```ppl +source=json_test +| eval array_length = json_array_length("{\"1\": 2}") +| head 1 +| fields array_length +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| array_length | +|--------------| +| null | ++--------------+ +``` + +## JSON_EXTRACT + +### Description + +Usage: `json_extract(json_string, path1, path2, ...)` Extracts values using the specified JSON paths. If only one path is provided, it returns a single value. If multiple paths are provided, it returns a JSON Array in the order of the paths. If one path cannot find value, return null as the result for this path. The path use "{}" to represent index for array, "{}" means "{*}". +Argument type: json_string: STRING, path1: STRING, path2: STRING ... +Return type: STRING +Example + +```ppl +source=json_test +| eval extract = json_extract('{"a": [{"b": 1}, {"b": 2}]}', 'a{}.b') +| head 1 +| fields extract +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| extract | +|---------| +| [1,2] | ++---------+ +``` + +```ppl +source=json_test +| eval extract = json_extract('{"a": [{"b": 1}, {"b": 2}]}', 'a{}.b', 'a{}') +| head 1 +| fields extract +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------+ +| extract | +|---------------------------| +| [[1,2],[{"b":1},{"b":2}]] | ++---------------------------+ +``` + +## JSON_DELETE + +### Description + +Usage: `json_delete(json_string, path1, path2, ...)` Delete values using the specified JSON paths. Return the json string after deleting. If one path cannot find value, do nothing. +Argument type: json_string: STRING, path1: STRING, path2: STRING ... +Return type: STRING +Example + +```ppl +source=json_test +| eval delete = json_delete('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b') +| head 1 +| fields delete +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| delete | +|--------------------| +| {"a":[{},{"b":2}]} | ++--------------------+ +``` + +```ppl +source=json_test +| eval delete = json_delete('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b', 'a{1}.b') +| head 1 +| fields delete +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------+ +| delete | +|---------------| +| {"a":[{},{}]} | ++---------------+ +``` + +```ppl +source=json_test +| eval delete = json_delete('{"a": [{"b": 1}, {"b": 2}]}', 'a{2}.b') +| head 1 +| fields delete +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| delete | +|-------------------------| +| {"a":[{"b":1},{"b":2}]} | ++-------------------------+ +``` + +## JSON_SET + +### Description + +Usage: `json_set(json_string, path1, value1, path2, value2...)` Set values to corresponding paths using the specified JSON paths. If one path's parent node is not a json object, skip the path. Return the json string after setting. +Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... +Return type: STRING +Example + +```ppl +source=json_test +| eval jsonSet = json_set('{"a": [{"b": 1}]}', 'a{0}.b', 3) +| head 1 +| fields jsonSet +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------+ +| jsonSet | +|-----------------| +| {"a":[{"b":3}]} | ++-----------------+ +``` + +```ppl +source=json_test +| eval jsonSet = json_set('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b', 3, 'a{1}.b', 4) +| head 1 +| fields jsonSet +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| jsonSet | +|-------------------------| +| {"a":[{"b":3},{"b":4}]} | ++-------------------------+ +``` + +## JSON_APPEND + +### Description + +Usage: `json_append(json_string, path1, value1, path2, value2...)` Append values to corresponding paths using the specified JSON paths. If one path's target node is not an array, skip the path. Return the json string after setting. +Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... +Return type: STRING +Example + +```ppl +source=json_test +| eval jsonAppend = json_set('{"a": [{"b": 1}]}', 'a', 3) +| head 1 +| fields jsonAppend +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------+ +| jsonAppend | +|------------| +| {"a":3} | ++------------+ +``` + +```ppl +source=json_test +| eval jsonAppend = json_append('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b', 3, 'a{1}.b', 4) +| head 1 +| fields jsonAppend +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| jsonAppend | +|-------------------------| +| {"a":[{"b":1},{"b":2}]} | ++-------------------------+ +``` + +```ppl +source=json_test +| eval jsonAppend = json_append('{"a": [{"b": 1}]}', 'a', '[1,2]', 'a{1}.b', 4) +| head 1 +| fields jsonAppend +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| jsonAppend | +|-------------------------| +| {"a":[{"b":1},"[1,2]"]} | ++-------------------------+ +``` + +## JSON_EXTEND + +### Description + +Usage: `json_extend(json_string, path1, value1, path2, value2...)` Extend values to corresponding paths using the specified JSON paths. If one path's target node is not an array, skip the path. The function will try to parse the value as an array. If it can be parsed, extend it to the target array. Otherwise, regard the value a single one. Return the json string after setting. +Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... +Return type: STRING +Example + +```ppl +source=json_test +| eval jsonExtend = json_extend('{"a": [{"b": 1}]}', 'a', 3) +| head 1 +| fields jsonExtend +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+ +| jsonExtend | +|-------------------| +| {"a":[{"b":1},3]} | ++-------------------+ +``` + +```ppl +source=json_test +| eval jsonExtend = json_extend('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b', 3, 'a{1}.b', 4) +| head 1 +| fields jsonExtend +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| jsonExtend | +|-------------------------| +| {"a":[{"b":1},{"b":2}]} | ++-------------------------+ +``` + +```ppl +source=json_test +| eval jsonExtend = json_extend('{"a": [{"b": 1}]}', 'a', '[1,2]') +| head 1 +| fields jsonExtend +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+ +| jsonExtend | +|-------------------------| +| {"a":[{"b":1},1.0,2.0]} | ++-------------------------+ +``` + +## JSON_KEYS + +### Description + +Usage: `json_keys(json_string)` Return the key list of the Json object as a Json array. Otherwise, return null. +Argument type: json_string: A JSON STRING +Return type: STRING +Example + +```ppl +source=json_test +| eval jsonKeys = json_keys('{"a": 1, "b": 2}') +| head 1 +| fields jsonKeys +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+ +| jsonKeys | +|-----------| +| ["a","b"] | ++-----------+ +``` + +```ppl +source=json_test +| eval jsonKeys = json_keys('{"a": {"c": 1}, "b": 2}') +| head 1 +| fields jsonKeys +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+ +| jsonKeys | +|-----------| +| ["a","b"] | ++-----------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/json.rst b/docs/user/ppl/functions/json.rst deleted file mode 100644 index 61377847e04..00000000000 --- a/docs/user/ppl/functions/json.rst +++ /dev/null @@ -1,363 +0,0 @@ -==================== -JSON Functions -==================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - - - -JSON Path ---------- - -Description ->>>>>>>>>>> - -All JSON paths used in JSON functions follow the format ``{}.{}...``. - -Each ```` represents a field name. The ``{}`` part is optional and is only applicable when the corresponding key refers to an array. - -For example:: - - a{2}.b{0} - -This refers to the element at index 0 of the ``b`` array, which is nested inside the element at index 2 of the ``a`` array. - -Notes: - -1. The ``{}`` notation applies **only when** the associated key points to an array. - -2. ``{}`` (without a specific index) is interpreted as a **wildcard**, equivalent to ``{*}``, meaning "all elements" in the array at that level. - -JSON ----------- - -Description ->>>>>>>>>>> - -Usage: `json(value)` Evaluates whether a string can be parsed as a json-encoded string. Returns the value if valid, null otherwise. - -Argument type: STRING - -Return type: STRING - -Example:: - - os> source=json_test | where json_valid(json_string) | eval json=json(json_string) | fields test_name, json_string, json - fetched rows / total rows = 4/4 - +--------------------+---------------------------------+---------------------------------+ - | test_name | json_string | json | - |--------------------+---------------------------------+---------------------------------| - | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {"a":"1","b":{"c":"2","d":"3"}} | - | json object | {"a":"1","b":"2"} | {"a":"1","b":"2"} | - | json array | [1, 2, 3, 4] | [1, 2, 3, 4] | - | json scalar string | "abc" | "abc" | - +--------------------+---------------------------------+---------------------------------+ - -JSON_VALID ----------- - -Description ->>>>>>>>>>> - -Version: 3.1.0 - -Limitation: Only works when plugins.calcite.enabled=true - -Usage: `json_valid(value)` Evaluates whether a string uses valid JSON syntax. Returns TRUE if valid, FALSE if invalid. NULL input returns NULL. - -Argument type: STRING - -Return type: BOOLEAN - -Example:: - - os> source=people | eval is_valid_json = json_valid('[1,2,3,4]'), is_invalid_json = json_valid('{invalid}') | fields is_valid_json, is_invalid_json | head 1 - fetched rows / total rows = 1/1 - +---------------+-----------------+ - | is_valid_json | is_invalid_json | - |---------------+-----------------| - | True | False | - +---------------+-----------------+ - -JSON_OBJECT ----------- - -Description ->>>>>>>>>>> - -Usage: `json_object(key1, value1, key2, value2...)` create a json object string with key value pairs. The key must be string. - -Argument type: key1: STRING, value1: ANY, key2: STRING, value2: ANY ... - -Return type: STRING - -Example:: - - os> source=json_test | eval test_json = json_object('key', 123.45) | head 1 | fields test_json - fetched rows / total rows = 1/1 - +----------------+ - | test_json | - |----------------| - | {"key":123.45} | - +----------------+ - -JSON_ARRAY ----------- - -Description ->>>>>>>>>>> - -Usage: `json_array(element1, element2, ...)` create a json array string with elements. - -Argument type: element1: ANY, element2: ANY ... - -Return type: STRING - -Example:: - - os> source=json_test | eval test_json_array = json_array('key', 123.45) | head 1 | fields test_json_array - fetched rows / total rows = 1/1 - +-----------------+ - | test_json_array | - |-----------------| - | ["key",123.45] | - +-----------------+ - -JSON_ARRAY_LENGTH ----------- - -Description ->>>>>>>>>>> - -Usage: `json_array_length(value)` parse the string to json array and return size,, null is returned in case of any other valid JSON string, null or an invalid JSON. - -Argument type: value: A JSON STRING - -Return type: INTEGER - -Example:: - - os> source=json_test | eval array_length = json_array_length("[1,2,3]") | head 1 | fields array_length - fetched rows / total rows = 1/1 - +--------------+ - | array_length | - |--------------| - | 3 | - +--------------+ - - os> source=json_test | eval array_length = json_array_length("{\"1\": 2}") | head 1 | fields array_length - fetched rows / total rows = 1/1 - +--------------+ - | array_length | - |--------------| - | null | - +--------------+ - -JSON_EXTRACT ----------- - -Description ->>>>>>>>>>> - -Usage: `json_extract(json_string, path1, path2, ...)` Extracts values using the specified JSON paths. If only one path is provided, it returns a single value. If multiple paths are provided, it returns a JSON Array in the order of the paths. If one path cannot find value, return null as the result for this path. The path use "{}" to represent index for array, "{}" means "{*}". - -Argument type: json_string: STRING, path1: STRING, path2: STRING ... - -Return type: STRING - -Example:: - - os> source=json_test | eval extract = json_extract('{"a": [{"b": 1}, {"b": 2}]}', 'a{}.b') | head 1 | fields extract - fetched rows / total rows = 1/1 - +---------+ - | extract | - |---------| - | [1,2] | - +---------+ - - os> source=json_test | eval extract = json_extract('{"a": [{"b": 1}, {"b": 2}]}', 'a{}.b', 'a{}') | head 1 | fields extract - fetched rows / total rows = 1/1 - +---------------------------+ - | extract | - |---------------------------| - | [[1,2],[{"b":1},{"b":2}]] | - +---------------------------+ - -JSON_DELETE ----------- - -Description ->>>>>>>>>>> - -Usage: `json_delete(json_string, path1, path2, ...)` Delete values using the specified JSON paths. Return the json string after deleting. If one path cannot find value, do nothing. - -Argument type: json_string: STRING, path1: STRING, path2: STRING ... - -Return type: STRING - -Example:: - - os> source=json_test | eval delete = json_delete('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b') | head 1 | fields delete - fetched rows / total rows = 1/1 - +--------------------+ - | delete | - |--------------------| - | {"a":[{},{"b":2}]} | - +--------------------+ - - os> source=json_test | eval delete = json_delete('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b', 'a{1}.b') | head 1 | fields delete - fetched rows / total rows = 1/1 - +---------------+ - | delete | - |---------------| - | {"a":[{},{}]} | - +---------------+ - - os> source=json_test | eval delete = json_delete('{"a": [{"b": 1}, {"b": 2}]}', 'a{2}.b') | head 1 | fields delete - fetched rows / total rows = 1/1 - +-------------------------+ - | delete | - |-------------------------| - | {"a":[{"b":1},{"b":2}]} | - +-------------------------+ - -JSON_SET ----------- - -Description ->>>>>>>>>>> - -Usage: `json_set(json_string, path1, value1, path2, value2...)` Set values to corresponding paths using the specified JSON paths. If one path's parent node is not a json object, skip the path. Return the json string after setting. - -Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... - -Return type: STRING - -Example:: - - os> source=json_test | eval jsonSet = json_set('{"a": [{"b": 1}]}', 'a{0}.b', 3) | head 1 | fields jsonSet - fetched rows / total rows = 1/1 - +-----------------+ - | jsonSet | - |-----------------| - | {"a":[{"b":3}]} | - +-----------------+ - - os> source=json_test | eval jsonSet = json_set('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b', 3, 'a{1}.b', 4) | head 1 | fields jsonSet - fetched rows / total rows = 1/1 - +-------------------------+ - | jsonSet | - |-------------------------| - | {"a":[{"b":3},{"b":4}]} | - +-------------------------+ - -JSON_APPEND ----------- - -Description ->>>>>>>>>>> - -Usage: `json_append(json_string, path1, value1, path2, value2...)` Append values to corresponding paths using the specified JSON paths. If one path's target node is not an array, skip the path. Return the json string after setting. - -Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... - -Return type: STRING - -Example:: - - os> source=json_test | eval jsonAppend = json_set('{"a": [{"b": 1}]}', 'a', 3) | head 1 | fields jsonAppend - fetched rows / total rows = 1/1 - +------------+ - | jsonAppend | - |------------| - | {"a":3} | - +------------+ - - os> source=json_test | eval jsonAppend = json_append('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b', 3, 'a{1}.b', 4) | head 1 | fields jsonAppend - fetched rows / total rows = 1/1 - +-------------------------+ - | jsonAppend | - |-------------------------| - | {"a":[{"b":1},{"b":2}]} | - +-------------------------+ - - os> source=json_test | eval jsonAppend = json_append('{"a": [{"b": 1}]}', 'a', '[1,2]', 'a{1}.b', 4) | head 1 | fields jsonAppend - fetched rows / total rows = 1/1 - +-------------------------+ - | jsonAppend | - |-------------------------| - | {"a":[{"b":1},"[1,2]"]} | - +-------------------------+ - -JSON_EXTEND ----------- - -Description ->>>>>>>>>>> - -Usage: `json_extend(json_string, path1, value1, path2, value2...)` Extend values to corresponding paths using the specified JSON paths. If one path's target node is not an array, skip the path. The function will try to parse the value as an array. If it can be parsed, extend it to the target array. Otherwise, regard the value a single one. Return the json string after setting. - -Argument type: json_string: STRING, path1: STRING, value1: ANY, path2: STRING, value2: ANY ... - -Return type: STRING - -Example:: - - os> source=json_test | eval jsonExtend = json_extend('{"a": [{"b": 1}]}', 'a', 3) | head 1 | fields jsonExtend - fetched rows / total rows = 1/1 - +-------------------+ - | jsonExtend | - |-------------------| - | {"a":[{"b":1},3]} | - +-------------------+ - - os> source=json_test | eval jsonExtend = json_extend('{"a": [{"b": 1}, {"b": 2}]}', 'a{0}.b', 3, 'a{1}.b', 4) | head 1 | fields jsonExtend - fetched rows / total rows = 1/1 - +-------------------------+ - | jsonExtend | - |-------------------------| - | {"a":[{"b":1},{"b":2}]} | - +-------------------------+ - - os> source=json_test | eval jsonExtend = json_extend('{"a": [{"b": 1}]}', 'a', '[1,2]') | head 1 | fields jsonExtend - fetched rows / total rows = 1/1 - +-------------------------+ - | jsonExtend | - |-------------------------| - | {"a":[{"b":1},1.0,2.0]} | - +-------------------------+ - -JSON_KEYS ----------- - -Description ->>>>>>>>>>> - -Usage: `json_keys(json_string)` Return the key list of the Json object as a Json array. Otherwise, return null. - -Argument type: json_string: A JSON STRING - -Return type: STRING - -Example:: - - os> source=json_test | eval jsonKeys = json_keys('{"a": 1, "b": 2}') | head 1 | fields jsonKeys - fetched rows / total rows = 1/1 - +-----------+ - | jsonKeys | - |-----------| - | ["a","b"] | - +-----------+ - - os> source=json_test | eval jsonKeys = json_keys('{"a": {"c": 1}, "b": 2}') | head 1 | fields jsonKeys - fetched rows / total rows = 1/1 - +-----------+ - | jsonKeys | - |-----------| - | ["a","b"] | - +-----------+ diff --git a/docs/user/ppl/functions/math.md b/docs/user/ppl/functions/math.md new file mode 100644 index 00000000000..6b2fe319df1 --- /dev/null +++ b/docs/user/ppl/functions/math.md @@ -0,0 +1,1187 @@ +# Mathematical Functions + +## ABS + +### Description + +Usage: abs(x) calculates the abs x. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: INTEGER/LONG/FLOAT/DOUBLE +Example + +```ppl +source=people +| eval `ABS(-1)` = ABS(-1) +| fields `ABS(-1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| ABS(-1) | +|---------| +| 1 | ++---------+ +``` + +## ADD + +### Description + +Usage: add(x, y) calculates x plus y. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: Wider number between x and y +Synonyms: Addition Symbol (+) +Example + +```ppl +source=people +| eval `ADD(2, 1)` = ADD(2, 1) +| fields `ADD(2, 1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+ +| ADD(2, 1) | +|-----------| +| 3 | ++-----------+ +``` + +## SUBTRACT + +### Description + +Usage: subtract(x, y) calculates x minus y. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: Wider number between x and y +Synonyms: Subtraction Symbol (-) +Example + +```ppl +source=people +| eval `SUBTRACT(2, 1)` = SUBTRACT(2, 1) +| fields `SUBTRACT(2, 1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+ +| SUBTRACT(2, 1) | +|----------------| +| 1 | ++----------------+ +``` + +## MULTIPLY + +### Description + +Usage: multiply(x, y) calculates the multiplication of x and y. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: Wider number between x and y. If y equals to 0, then returns NULL. +Synonyms: Multiplication Symbol (\*) +Example + +```ppl +source=people +| eval `MULTIPLY(2, 1)` = MULTIPLY(2, 1) +| fields `MULTIPLY(2, 1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+ +| MULTIPLY(2, 1) | +|----------------| +| 2 | ++----------------+ +``` + +## DIVIDE + +### Description + +Usage: divide(x, y) calculates x divided by y. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: Wider number between x and y +Synonyms: Division Symbol (/) +Example + +```ppl +source=people +| eval `DIVIDE(2, 1)` = DIVIDE(2, 1) +| fields `DIVIDE(2, 1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+ +| DIVIDE(2, 1) | +|--------------| +| 2 | ++--------------+ +``` + +## SUM + +### Description + +Usage: sum(x, y, ...) calculates the sum of all provided arguments. This function accepts a variable number of arguments. +Note: This function is only available in the eval command context and is rewritten to arithmetic addition while query parsing. +Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE arguments +Return type: Wider number type among all arguments +Example + +```ppl +source=accounts +| eval `SUM(1, 2, 3)` = SUM(1, 2, 3) +| fields `SUM(1, 2, 3)` +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------------+ +| SUM(1, 2, 3) | +|--------------| +| 6 | +| 6 | +| 6 | +| 6 | ++--------------+ +``` + +```ppl +source=accounts +| eval total = SUM(age, 10, 5) +| fields age, total +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+-------+ +| age | total | +|-----+-------| +| 32 | 47 | +| 36 | 51 | +| 28 | 43 | +| 33 | 48 | ++-----+-------+ +``` + +## AVG + +### Description + +Usage: avg(x, y, ...) calculates the average (arithmetic mean) of all provided arguments. This function accepts a variable number of arguments. +Note: This function is only available in the eval command context and is rewritten to arithmetic expression (sum / count) at query parsing time. +Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE arguments +Return type: DOUBLE +Example + +```ppl +source=accounts +| eval `AVG(1, 2, 3)` = AVG(1, 2, 3) +| fields `AVG(1, 2, 3)` +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++--------------+ +| AVG(1, 2, 3) | +|--------------| +| 2.0 | +| 2.0 | +| 2.0 | +| 2.0 | ++--------------+ +``` + +```ppl +source=accounts +| eval average = AVG(age, 30) +| fields age, average +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+---------+ +| age | average | +|-----+---------| +| 32 | 31.0 | +| 36 | 33.0 | +| 28 | 29.0 | +| 33 | 31.5 | ++-----+---------+ +``` + +## ACOS + +### Description + +Usage: acos(x) calculates the arc cosine of x. Returns NULL if x is not in the range -1 to 1. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `ACOS(0)` = ACOS(0) +| fields `ACOS(0)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| ACOS(0) | +|--------------------| +| 1.5707963267948966 | ++--------------------+ +``` + +## ASIN + +### Description + +Usage: asin(x) calculate the arc sine of x. Returns NULL if x is not in the range -1 to 1. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `ASIN(0)` = ASIN(0) +| fields `ASIN(0)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| ASIN(0) | +|---------| +| 0.0 | ++---------+ +``` + +## ATAN + +### Description + +Usage: atan(x) calculates the arc tangent of x. atan(y, x) calculates the arc tangent of y / x, except that the signs of both arguments are used to determine the quadrant of the result. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `ATAN(2)` = ATAN(2), `ATAN(2, 3)` = ATAN(2, 3) +| fields `ATAN(2)`, `ATAN(2, 3)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+--------------------+ +| ATAN(2) | ATAN(2, 3) | +|--------------------+--------------------| +| 1.1071487177940904 | 0.5880026035475675 | ++--------------------+--------------------+ +``` + +## ATAN2 + +### Description + +Usage: atan2(y, x) calculates the arc tangent of y / x, except that the signs of both arguments are used to determine the quadrant of the result. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `ATAN2(2, 3)` = ATAN2(2, 3) +| fields `ATAN2(2, 3)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| ATAN2(2, 3) | +|--------------------| +| 0.5880026035475675 | ++--------------------+ +``` + +## CEIL + +An alias for [CEILING](#ceiling) function. +## CEILING + +### Description + +Usage: CEILING(T) takes the ceiling of value T. +Note: [CEIL](#ceil) and CEILING functions have the same implementation & functionality +Limitation: CEILING only works as expected when IEEE 754 double type displays decimal when stored. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: same type with input +Example + +```ppl +source=people +| eval `CEILING(0)` = CEILING(0), `CEILING(50.00005)` = CEILING(50.00005), `CEILING(-50.00005)` = CEILING(-50.00005) +| fields `CEILING(0)`, `CEILING(50.00005)`, `CEILING(-50.00005)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------+-------------------+--------------------+ +| CEILING(0) | CEILING(50.00005) | CEILING(-50.00005) | +|------------+-------------------+--------------------| +| 0 | 51.0 | -50.0 | ++------------+-------------------+--------------------+ +``` + +```ppl +source=people +| eval `CEILING(3147483647.12345)` = CEILING(3147483647.12345), `CEILING(113147483647.12345)` = CEILING(113147483647.12345), `CEILING(3147483647.00001)` = CEILING(3147483647.00001) +| fields `CEILING(3147483647.12345)`, `CEILING(113147483647.12345)`, `CEILING(3147483647.00001)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------------+-----------------------------+---------------------------+ +| CEILING(3147483647.12345) | CEILING(113147483647.12345) | CEILING(3147483647.00001) | +|---------------------------+-----------------------------+---------------------------| +| 3147483648.0 | 113147483648.0 | 3147483648.0 | ++---------------------------+-----------------------------+---------------------------+ +``` + +## CONV + +### Description + +Usage: CONV(x, a, b) converts the number x from a base to b base. +Argument type: x: STRING, a: INTEGER, b: INTEGER +Return type: STRING +Example + +```ppl +source=people +| eval `CONV('12', 10, 16)` = CONV('12', 10, 16), `CONV('2C', 16, 10)` = CONV('2C', 16, 10), `CONV(12, 10, 2)` = CONV(12, 10, 2), `CONV(1111, 2, 10)` = CONV(1111, 2, 10) +| fields `CONV('12', 10, 16)`, `CONV('2C', 16, 10)`, `CONV(12, 10, 2)`, `CONV(1111, 2, 10)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+--------------------+-----------------+-------------------+ +| CONV('12', 10, 16) | CONV('2C', 16, 10) | CONV(12, 10, 2) | CONV(1111, 2, 10) | +|--------------------+--------------------+-----------------+-------------------| +| c | 44 | 1100 | 15 | ++--------------------+--------------------+-----------------+-------------------+ +``` + +## COS + +### Description + +Usage: cos(x) calculates the cosine of x, where x is given in radians. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `COS(0)` = COS(0) +| fields `COS(0)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| COS(0) | +|--------| +| 1.0 | ++--------+ +``` + +## COSH + +### Description + +Usage: cosh(x) calculates the hyperbolic cosine of x, defined as (((e^x) + (e^(-x))) / 2). +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `COSH(2)` = COSH(2) +| fields `COSH(2)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| COSH(2) | +|--------------------| +| 3.7621956910836314 | ++--------------------+ +``` + +## COT + +### Description + +Usage: cot(x) calculates the cotangent of x. Returns out-of-range error if x equals to 0. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `COT(1)` = COT(1) +| fields `COT(1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| COT(1) | +|--------------------| +| 0.6420926159343306 | ++--------------------+ +``` + +## CRC32 + +### Description + +Usage: Calculates a cyclic redundancy check value and returns a 32-bit unsigned value. +Argument type: STRING +Return type: LONG +Example + +```ppl +source=people +| eval `CRC32('MySQL')` = CRC32('MySQL') +| fields `CRC32('MySQL')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+ +| CRC32('MySQL') | +|----------------| +| 3259397556 | ++----------------+ +``` + +## DEGREES + +### Description + +Usage: degrees(x) converts x from radians to degrees. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `DEGREES(1.57)` = DEGREES(1.57) +| fields `DEGREES(1.57)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+ +| DEGREES(1.57) | +|-------------------| +| 89.95437383553924 | ++-------------------+ +``` + +## E + +### Description + +Usage: E() returns the Euler's number +Return type: DOUBLE +Example + +```ppl +source=people +| eval `E()` = E() +| fields `E()` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+ +| E() | +|-------------------| +| 2.718281828459045 | ++-------------------+ +``` + +## EXP + +### Description + +Usage: exp(x) return e raised to the power of x. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `EXP(2)` = EXP(2) +| fields `EXP(2)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------+ +| EXP(2) | +|------------------| +| 7.38905609893065 | ++------------------+ +``` + +## EXPM1 + +### Description + +Usage: expm1(NUMBER T) returns the exponential of T, minus 1. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `EXPM1(1)` = EXPM1(1) +| fields `EXPM1(1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+ +| EXPM1(1) | +|-------------------| +| 1.718281828459045 | ++-------------------+ +``` + +## FLOOR + +### Description + +Usage: FLOOR(T) takes the floor of value T. +Limitation: FLOOR only works as expected when IEEE 754 double type displays decimal when stored. +Argument type: a: INTEGER/LONG/FLOAT/DOUBLE +Return type: same type with input +Example + +```ppl +source=people +| eval `FLOOR(0)` = FLOOR(0), `FLOOR(50.00005)` = FLOOR(50.00005), `FLOOR(-50.00005)` = FLOOR(-50.00005) +| fields `FLOOR(0)`, `FLOOR(50.00005)`, `FLOOR(-50.00005)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+-----------------+------------------+ +| FLOOR(0) | FLOOR(50.00005) | FLOOR(-50.00005) | +|----------+-----------------+------------------| +| 0 | 50.0 | -51.0 | ++----------+-----------------+------------------+ +``` + +```ppl +source=people +| eval `FLOOR(3147483647.12345)` = FLOOR(3147483647.12345), `FLOOR(113147483647.12345)` = FLOOR(113147483647.12345), `FLOOR(3147483647.00001)` = FLOOR(3147483647.00001) +| fields `FLOOR(3147483647.12345)`, `FLOOR(113147483647.12345)`, `FLOOR(3147483647.00001)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------+---------------------------+-------------------------+ +| FLOOR(3147483647.12345) | FLOOR(113147483647.12345) | FLOOR(3147483647.00001) | +|-------------------------+---------------------------+-------------------------| +| 3147483647.0 | 113147483647.0 | 3147483647.0 | ++-------------------------+---------------------------+-------------------------+ +``` + +```ppl +source=people +| eval `FLOOR(282474973688888.022)` = FLOOR(282474973688888.022), `FLOOR(9223372036854775807.022)` = FLOOR(9223372036854775807.022), `FLOOR(9223372036854775807.0000001)` = FLOOR(9223372036854775807.0000001) +| fields `FLOOR(282474973688888.022)`, `FLOOR(9223372036854775807.022)`, `FLOOR(9223372036854775807.0000001)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------+--------------------------------+------------------------------------+ +| FLOOR(282474973688888.022) | FLOOR(9223372036854775807.022) | FLOOR(9223372036854775807.0000001) | +|----------------------------+--------------------------------+------------------------------------| +| 282474973688888.0 | 9.223372036854776e+18 | 9.223372036854776e+18 | ++----------------------------+--------------------------------+------------------------------------+ +``` + +## LN + +### Description + +Usage: ln(x) return the the natural logarithm of x. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `LN(2)` = LN(2) +| fields `LN(2)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| LN(2) | +|--------------------| +| 0.6931471805599453 | ++--------------------+ +``` + +## LOG + +### Description + +Specifications: +Usage: log(x) returns the natural logarithm of x that is the base e logarithm of the x. log(B, x) is equivalent to log(x)/log(B). +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `LOG(2)` = LOG(2), `LOG(2, 8)` = LOG(2, 8) +| fields `LOG(2)`, `LOG(2, 8)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+-----------+ +| LOG(2) | LOG(2, 8) | +|--------------------+-----------| +| 0.6931471805599453 | 3.0 | ++--------------------+-----------+ +``` + +## LOG2 + +### Description + +Specifications: +Usage: log2(x) is equivalent to log(x)/log(2). +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `LOG2(8)` = LOG2(8) +| fields `LOG2(8)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| LOG2(8) | +|---------| +| 3.0 | ++---------+ +``` + +## LOG10 + +### Description + +Specifications: +Usage: log10(x) is equivalent to log(x)/log(10). +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `LOG10(100)` = LOG10(100) +| fields `LOG10(100)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------+ +| LOG10(100) | +|------------| +| 2.0 | ++------------+ +``` + +## MOD + +### Description + +Usage: MOD(n, m) calculates the remainder of the number n divided by m. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: Wider type between types of n and m if m is nonzero value. If m equals to 0, then returns NULL. +Example + +```ppl +source=people +| eval `MOD(3, 2)` = MOD(3, 2), `MOD(3.1, 2)` = MOD(3.1, 2) +| fields `MOD(3, 2)`, `MOD(3.1, 2)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+-------------+ +| MOD(3, 2) | MOD(3.1, 2) | +|-----------+-------------| +| 1 | 1.1 | ++-----------+-------------+ +``` + +## MODULUS + +### Description + +Usage: MODULUS(n, m) calculates the remainder of the number n divided by m. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: Wider type between types of n and m if m is nonzero value. If m equals to 0, then returns NULL. +Example + +```ppl +source=people +| eval `MODULUS(3, 2)` = MODULUS(3, 2), `MODULUS(3.1, 2)` = MODULUS(3.1, 2) +| fields `MODULUS(3, 2)`, `MODULUS(3.1, 2)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------+-----------------+ +| MODULUS(3, 2) | MODULUS(3.1, 2) | +|---------------+-----------------| +| 1 | 1.1 | ++---------------+-----------------+ +``` + +## PI + +### Description + +Usage: PI() returns the constant pi +Return type: DOUBLE +Example + +```ppl +source=people +| eval `PI()` = PI() +| fields `PI()` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+ +| PI() | +|-------------------| +| 3.141592653589793 | ++-------------------+ +``` + +## POW + +### Description + +Usage: POW(x, y) calculates the value of x raised to the power of y. Bad inputs return NULL result. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Synonyms: [POWER](#power) +Example + +```ppl +source=people +| eval `POW(3, 2)` = POW(3, 2), `POW(-3, 2)` = POW(-3, 2), `POW(3, -2)` = POW(3, -2) +| fields `POW(3, 2)`, `POW(-3, 2)`, `POW(3, -2)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+------------+--------------------+ +| POW(3, 2) | POW(-3, 2) | POW(3, -2) | +|-----------+------------+--------------------| +| 9.0 | 9.0 | 0.1111111111111111 | ++-----------+------------+--------------------+ +``` + +## POWER + +### Description + +Usage: POWER(x, y) calculates the value of x raised to the power of y. Bad inputs return NULL result. +Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Synonyms: [POW](#pow) +Example + +```ppl +source=people +| eval `POWER(3, 2)` = POWER(3, 2), `POWER(-3, 2)` = POWER(-3, 2), `POWER(3, -2)` = POWER(3, -2) +| fields `POWER(3, 2)`, `POWER(-3, 2)`, `POWER(3, -2)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------+--------------+--------------------+ +| POWER(3, 2) | POWER(-3, 2) | POWER(3, -2) | +|-------------+--------------+--------------------| +| 9.0 | 9.0 | 0.1111111111111111 | ++-------------+--------------+--------------------+ +``` + +## RADIANS + +### Description + +Usage: radians(x) converts x from degrees to radians. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `RADIANS(90)` = RADIANS(90) +| fields `RADIANS(90)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------+ +| RADIANS(90) | +|--------------------| +| 1.5707963267948966 | ++--------------------+ +``` + +## RAND + +### Description + +Usage: RAND()/RAND(N) returns a random floating-point value in the range 0 <= value < 1.0. If integer N is specified, the seed is initialized prior to execution. One implication of this behavior is with identical argument N, rand(N) returns the same value each time, and thus produces a repeatable sequence of column values. +Argument type: INTEGER +Return type: FLOAT +Example + +```ppl +source=people +| eval `RAND(3)` = RAND(3) +| fields `RAND(3)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+ +| RAND(3) | +|---------------------| +| 0.34346429521113886 | ++---------------------+ +``` + +## ROUND + +### Description + +Usage: ROUND(x, d) rounds the argument x to d decimal places, d defaults to 0 if not specified +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type map: +(INTEGER/LONG [,INTEGER]) -> LONG +(FLOAT/DOUBLE [,INTEGER]) -> LONG +Example + +```ppl +source=people +| eval `ROUND(12.34)` = ROUND(12.34), `ROUND(12.34, 1)` = ROUND(12.34, 1), `ROUND(12.34, -1)` = ROUND(12.34, -1), `ROUND(12, 1)` = ROUND(12, 1) +| fields `ROUND(12.34)`, `ROUND(12.34, 1)`, `ROUND(12.34, -1)`, `ROUND(12, 1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+-----------------+------------------+--------------+ +| ROUND(12.34) | ROUND(12.34, 1) | ROUND(12.34, -1) | ROUND(12, 1) | +|--------------+-----------------+------------------+--------------| +| 12.0 | 12.3 | 10.0 | 12 | ++--------------+-----------------+------------------+--------------+ +``` + +## SIGN + +### Description + +Usage: Returns the sign of the argument as -1, 0, or 1, depending on whether the number is negative, zero, or positive +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: same type with input +Example + +```ppl +source=people +| eval `SIGN(1)` = SIGN(1), `SIGN(0)` = SIGN(0), `SIGN(-1.1)` = SIGN(-1.1) +| fields `SIGN(1)`, `SIGN(0)`, `SIGN(-1.1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+---------+------------+ +| SIGN(1) | SIGN(0) | SIGN(-1.1) | +|---------+---------+------------| +| 1 | 0 | -1.0 | ++---------+---------+------------+ +``` + +## SIGNUM + +### Description + +Usage: Returns the sign of the argument as -1, 0, or 1, depending on whether the number is negative, zero, or positive +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: INTEGER +Synonyms: `SIGN` +Example + +```ppl +source=people +| eval `SIGNUM(1)` = SIGNUM(1), `SIGNUM(0)` = SIGNUM(0), `SIGNUM(-1.1)` = SIGNUM(-1.1) +| fields `SIGNUM(1)`, `SIGNUM(0)`, `SIGNUM(-1.1)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+-----------+--------------+ +| SIGNUM(1) | SIGNUM(0) | SIGNUM(-1.1) | +|-----------+-----------+--------------| +| 1 | 0 | -1.0 | ++-----------+-----------+--------------+ +``` + +## SIN + +### Description + +Usage: sin(x) calculates the sine of x, where x is given in radians. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `SIN(0)` = SIN(0) +| fields `SIN(0)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------+ +| SIN(0) | +|--------| +| 0.0 | ++--------+ +``` + +## SINH + +### Description + +Usage: sinh(x) calculates the hyperbolic sine of x, defined as (((e^x) - (e^(-x))) / 2). +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `SINH(2)` = SINH(2) +| fields `SINH(2)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+ +| SINH(2) | +|-------------------| +| 3.626860407847019 | ++-------------------+ +``` + +## SQRT + +### Description + +Usage: Calculates the square root of a non-negative number +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type map: +(Non-negative) INTEGER/LONG/FLOAT/DOUBLE -> DOUBLE +(Negative) INTEGER/LONG/FLOAT/DOUBLE -> NULL +Example + +```ppl +source=people +| eval `SQRT(4)` = SQRT(4), `SQRT(4.41)` = SQRT(4.41) +| fields `SQRT(4)`, `SQRT(4.41)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+------------+ +| SQRT(4) | SQRT(4.41) | +|---------+------------| +| 2.0 | 2.1 | ++---------+------------+ +``` + +## CBRT + +### Description + +Usage: Calculates the cube root of a number +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type DOUBLE: +INTEGER/LONG/FLOAT/DOUBLE -> DOUBLE +Example + +```ppl ignore +source=location +| eval `CBRT(8)` = CBRT(8), `CBRT(9.261)` = CBRT(9.261), `CBRT(-27)` = CBRT(-27) +| fields `CBRT(8)`, `CBRT(9.261)`, `CBRT(-27)` +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++---------+-------------+-----------+ +| CBRT(8) | CBRT(9.261) | CBRT(-27) | +|---------+-------------+-----------| +| 2.0 | 2.1 | -3.0 | +| 2.0 | 2.1 | -3.0 | ++---------+-------------+-----------+ +``` + +## RINT + +### Description + +Usage: rint(NUMBER T) returns T rounded to the closest whole integer number. +Argument type: INTEGER/LONG/FLOAT/DOUBLE +Return type: DOUBLE +Example + +```ppl +source=people +| eval `RINT(1.7)` = RINT(1.7) +| fields `RINT(1.7)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+ +| RINT(1.7) | +|-----------| +| 2.0 | ++-----------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/math.rst b/docs/user/ppl/functions/math.rst deleted file mode 100644 index 20029c2c6ec..00000000000 --- a/docs/user/ppl/functions/math.rst +++ /dev/null @@ -1,1045 +0,0 @@ -====================== -Mathematical Functions -====================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -ABS ---- - -Description ->>>>>>>>>>> - -Usage: abs(x) calculates the abs x. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: INTEGER/LONG/FLOAT/DOUBLE - -Example:: - - os> source=people | eval `ABS(-1)` = ABS(-1) | fields `ABS(-1)` - fetched rows / total rows = 1/1 - +---------+ - | ABS(-1) | - |---------| - | 1 | - +---------+ - - -ADD ---- - -Description ->>>>>>>>>>> - -Usage: add(x, y) calculates x plus y. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: Wider number between x and y - -Synonyms: Addition Symbol (+) - -Example:: - - os> source=people | eval `ADD(2, 1)` = ADD(2, 1) | fields `ADD(2, 1)` - fetched rows / total rows = 1/1 - +-----------+ - | ADD(2, 1) | - |-----------| - | 3 | - +-----------+ - - -SUBTRACT --------- - -Description ->>>>>>>>>>> - -Usage: subtract(x, y) calculates x minus y. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: Wider number between x and y - -Synonyms: Subtraction Symbol (-) - -Example:: - - os> source=people | eval `SUBTRACT(2, 1)` = SUBTRACT(2, 1) | fields `SUBTRACT(2, 1)` - fetched rows / total rows = 1/1 - +----------------+ - | SUBTRACT(2, 1) | - |----------------| - | 1 | - +----------------+ - - -MULTIPLY --------- - -Description ->>>>>>>>>>> - -Usage: multiply(x, y) calculates the multiplication of x and y. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: Wider number between x and y. If y equals to 0, then returns NULL. - -Synonyms: Multiplication Symbol (\*) - -Example:: - - os> source=people | eval `MULTIPLY(2, 1)` = MULTIPLY(2, 1) | fields `MULTIPLY(2, 1)` - fetched rows / total rows = 1/1 - +----------------+ - | MULTIPLY(2, 1) | - |----------------| - | 2 | - +----------------+ - - -DIVIDE ------- - -Description ->>>>>>>>>>> - -Usage: divide(x, y) calculates x divided by y. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: Wider number between x and y - -Synonyms: Division Symbol (/) - -Example:: - - os> source=people | eval `DIVIDE(2, 1)` = DIVIDE(2, 1) | fields `DIVIDE(2, 1)` - fetched rows / total rows = 1/1 - +--------------+ - | DIVIDE(2, 1) | - |--------------| - | 2 | - +--------------+ - - -SUM ---- - -Description ->>>>>>>>>>> - -Usage: sum(x, y, ...) calculates the sum of all provided arguments. This function accepts a variable number of arguments. - -Note: This function is only available in the eval command context and is rewritten to arithmetic addition while query parsing. - -Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE arguments - -Return type: Wider number type among all arguments - -Example:: - - os> source=accounts | eval `SUM(1, 2, 3)` = SUM(1, 2, 3) | fields `SUM(1, 2, 3)` - fetched rows / total rows = 4/4 - +--------------+ - | SUM(1, 2, 3) | - |--------------| - | 6 | - | 6 | - | 6 | - | 6 | - +--------------+ - - os> source=accounts | eval total = SUM(age, 10, 5) | fields age, total - fetched rows / total rows = 4/4 - +-----+-------+ - | age | total | - |-----+-------| - | 32 | 47 | - | 36 | 51 | - | 28 | 43 | - | 33 | 48 | - +-----+-------+ - - -AVG ---- - -Description ->>>>>>>>>>> - -Usage: avg(x, y, ...) calculates the average (arithmetic mean) of all provided arguments. This function accepts a variable number of arguments. - -Note: This function is only available in the eval command context and is rewritten to arithmetic expression (sum / count) at query parsing time. - -Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE arguments - -Return type: DOUBLE - -Example:: - - os> source=accounts | eval `AVG(1, 2, 3)` = AVG(1, 2, 3) | fields `AVG(1, 2, 3)` - fetched rows / total rows = 4/4 - +--------------+ - | AVG(1, 2, 3) | - |--------------| - | 2.0 | - | 2.0 | - | 2.0 | - | 2.0 | - +--------------+ - - os> source=accounts | eval average = AVG(age, 30) | fields age, average - fetched rows / total rows = 4/4 - +-----+---------+ - | age | average | - |-----+---------| - | 32 | 31.0 | - | 36 | 33.0 | - | 28 | 29.0 | - | 33 | 31.5 | - +-----+---------+ - - -ACOS ----- - -Description ->>>>>>>>>>> - -Usage: acos(x) calculates the arc cosine of x. Returns NULL if x is not in the range -1 to 1. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `ACOS(0)` = ACOS(0) | fields `ACOS(0)` - fetched rows / total rows = 1/1 - +--------------------+ - | ACOS(0) | - |--------------------| - | 1.5707963267948966 | - +--------------------+ - - -ASIN ----- - -Description ->>>>>>>>>>> - -Usage: asin(x) calculate the arc sine of x. Returns NULL if x is not in the range -1 to 1. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `ASIN(0)` = ASIN(0) | fields `ASIN(0)` - fetched rows / total rows = 1/1 - +---------+ - | ASIN(0) | - |---------| - | 0.0 | - +---------+ - - -ATAN ----- - -Description ->>>>>>>>>>> - -Usage: atan(x) calculates the arc tangent of x. atan(y, x) calculates the arc tangent of y / x, except that the signs of both arguments are used to determine the quadrant of the result. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `ATAN(2)` = ATAN(2), `ATAN(2, 3)` = ATAN(2, 3) | fields `ATAN(2)`, `ATAN(2, 3)` - fetched rows / total rows = 1/1 - +--------------------+--------------------+ - | ATAN(2) | ATAN(2, 3) | - |--------------------+--------------------| - | 1.1071487177940904 | 0.5880026035475675 | - +--------------------+--------------------+ - - -ATAN2 ------ - -Description ->>>>>>>>>>> - -Usage: atan2(y, x) calculates the arc tangent of y / x, except that the signs of both arguments are used to determine the quadrant of the result. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `ATAN2(2, 3)` = ATAN2(2, 3) | fields `ATAN2(2, 3)` - fetched rows / total rows = 1/1 - +--------------------+ - | ATAN2(2, 3) | - |--------------------| - | 0.5880026035475675 | - +--------------------+ - - -CEIL ----- - -An alias for `CEILING`_ function. - - -CEILING -------- - -Description ->>>>>>>>>>> - -Usage: CEILING(T) takes the ceiling of value T. - -Note: `CEIL`_ and CEILING functions have the same implementation & functionality - -Limitation: CEILING only works as expected when IEEE 754 double type displays decimal when stored. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: same type with input - -Example:: - - os> source=people | eval `CEILING(0)` = CEILING(0), `CEILING(50.00005)` = CEILING(50.00005), `CEILING(-50.00005)` = CEILING(-50.00005) | fields `CEILING(0)`, `CEILING(50.00005)`, `CEILING(-50.00005)` - fetched rows / total rows = 1/1 - +------------+-------------------+--------------------+ - | CEILING(0) | CEILING(50.00005) | CEILING(-50.00005) | - |------------+-------------------+--------------------| - | 0 | 51.0 | -50.0 | - +------------+-------------------+--------------------+ - - os> source=people | eval `CEILING(3147483647.12345)` = CEILING(3147483647.12345), `CEILING(113147483647.12345)` = CEILING(113147483647.12345), `CEILING(3147483647.00001)` = CEILING(3147483647.00001) | fields `CEILING(3147483647.12345)`, `CEILING(113147483647.12345)`, `CEILING(3147483647.00001)` - fetched rows / total rows = 1/1 - +---------------------------+-----------------------------+---------------------------+ - | CEILING(3147483647.12345) | CEILING(113147483647.12345) | CEILING(3147483647.00001) | - |---------------------------+-----------------------------+---------------------------| - | 3147483648.0 | 113147483648.0 | 3147483648.0 | - +---------------------------+-----------------------------+---------------------------+ - - -CONV ----- - -Description ->>>>>>>>>>> - -Usage: CONV(x, a, b) converts the number x from a base to b base. - -Argument type: x: STRING, a: INTEGER, b: INTEGER - -Return type: STRING - -Example:: - - os> source=people | eval `CONV('12', 10, 16)` = CONV('12', 10, 16), `CONV('2C', 16, 10)` = CONV('2C', 16, 10), `CONV(12, 10, 2)` = CONV(12, 10, 2), `CONV(1111, 2, 10)` = CONV(1111, 2, 10) | fields `CONV('12', 10, 16)`, `CONV('2C', 16, 10)`, `CONV(12, 10, 2)`, `CONV(1111, 2, 10)` - fetched rows / total rows = 1/1 - +--------------------+--------------------+-----------------+-------------------+ - | CONV('12', 10, 16) | CONV('2C', 16, 10) | CONV(12, 10, 2) | CONV(1111, 2, 10) | - |--------------------+--------------------+-----------------+-------------------| - | c | 44 | 1100 | 15 | - +--------------------+--------------------+-----------------+-------------------+ - - -COS ---- - -Description ->>>>>>>>>>> - -Usage: cos(x) calculates the cosine of x, where x is given in radians. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `COS(0)` = COS(0) | fields `COS(0)` - fetched rows / total rows = 1/1 - +--------+ - | COS(0) | - |--------| - | 1.0 | - +--------+ - - -COSH ----- - -Description ->>>>>>>>>>> - -Usage: cosh(x) calculates the hyperbolic cosine of x, defined as (((e^x) + (e^(-x))) / 2). - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `COSH(2)` = COSH(2) | fields `COSH(2)` - fetched rows / total rows = 1/1 - +--------------------+ - | COSH(2) | - |--------------------| - | 3.7621956910836314 | - +--------------------+ - - -COT ---- - -Description ->>>>>>>>>>> - -Usage: cot(x) calculates the cotangent of x. Returns out-of-range error if x equals to 0. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `COT(1)` = COT(1) | fields `COT(1)` - fetched rows / total rows = 1/1 - +--------------------+ - | COT(1) | - |--------------------| - | 0.6420926159343306 | - +--------------------+ - - -CRC32 ------ - -Description ->>>>>>>>>>> - -Usage: Calculates a cyclic redundancy check value and returns a 32-bit unsigned value. - -Argument type: STRING - -Return type: LONG - -Example:: - - os> source=people | eval `CRC32('MySQL')` = CRC32('MySQL') | fields `CRC32('MySQL')` - fetched rows / total rows = 1/1 - +----------------+ - | CRC32('MySQL') | - |----------------| - | 3259397556 | - +----------------+ - - -DEGREES -------- - -Description ->>>>>>>>>>> - -Usage: degrees(x) converts x from radians to degrees. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `DEGREES(1.57)` = DEGREES(1.57) | fields `DEGREES(1.57)` - fetched rows / total rows = 1/1 - +-------------------+ - | DEGREES(1.57) | - |-------------------| - | 89.95437383553924 | - +-------------------+ - - -E -- - -Description ->>>>>>>>>>> - -Usage: E() returns the Euler's number - -Return type: DOUBLE - -Example:: - - os> source=people | eval `E()` = E() | fields `E()` - fetched rows / total rows = 1/1 - +-------------------+ - | E() | - |-------------------| - | 2.718281828459045 | - +-------------------+ - - -EXP ---- - -Description ->>>>>>>>>>> - -Usage: exp(x) return e raised to the power of x. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `EXP(2)` = EXP(2) | fields `EXP(2)` - fetched rows / total rows = 1/1 - +------------------+ - | EXP(2) | - |------------------| - | 7.38905609893065 | - +------------------+ - - -EXPM1 ------ - -Description ->>>>>>>>>>> - -Usage: expm1(NUMBER T) returns the exponential of T, minus 1. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `EXPM1(1)` = EXPM1(1) | fields `EXPM1(1)` - fetched rows / total rows = 1/1 - +-------------------+ - | EXPM1(1) | - |-------------------| - | 1.718281828459045 | - +-------------------+ - - -FLOOR ------ - -Description ->>>>>>>>>>> - -Usage: FLOOR(T) takes the floor of value T. - -Limitation: FLOOR only works as expected when IEEE 754 double type displays decimal when stored. - -Argument type: a: INTEGER/LONG/FLOAT/DOUBLE - -Return type: same type with input - -Example:: - - os> source=people | eval `FLOOR(0)` = FLOOR(0), `FLOOR(50.00005)` = FLOOR(50.00005), `FLOOR(-50.00005)` = FLOOR(-50.00005) | fields `FLOOR(0)`, `FLOOR(50.00005)`, `FLOOR(-50.00005)` - fetched rows / total rows = 1/1 - +----------+-----------------+------------------+ - | FLOOR(0) | FLOOR(50.00005) | FLOOR(-50.00005) | - |----------+-----------------+------------------| - | 0 | 50.0 | -51.0 | - +----------+-----------------+------------------+ - - os> source=people | eval `FLOOR(3147483647.12345)` = FLOOR(3147483647.12345), `FLOOR(113147483647.12345)` = FLOOR(113147483647.12345), `FLOOR(3147483647.00001)` = FLOOR(3147483647.00001) | fields `FLOOR(3147483647.12345)`, `FLOOR(113147483647.12345)`, `FLOOR(3147483647.00001)` - fetched rows / total rows = 1/1 - +-------------------------+---------------------------+-------------------------+ - | FLOOR(3147483647.12345) | FLOOR(113147483647.12345) | FLOOR(3147483647.00001) | - |-------------------------+---------------------------+-------------------------| - | 3147483647.0 | 113147483647.0 | 3147483647.0 | - +-------------------------+---------------------------+-------------------------+ - - os> source=people | eval `FLOOR(282474973688888.022)` = FLOOR(282474973688888.022), `FLOOR(9223372036854775807.022)` = FLOOR(9223372036854775807.022), `FLOOR(9223372036854775807.0000001)` = FLOOR(9223372036854775807.0000001) | fields `FLOOR(282474973688888.022)`, `FLOOR(9223372036854775807.022)`, `FLOOR(9223372036854775807.0000001)` - fetched rows / total rows = 1/1 - +----------------------------+--------------------------------+------------------------------------+ - | FLOOR(282474973688888.022) | FLOOR(9223372036854775807.022) | FLOOR(9223372036854775807.0000001) | - |----------------------------+--------------------------------+------------------------------------| - | 282474973688888.0 | 9.223372036854776e+18 | 9.223372036854776e+18 | - +----------------------------+--------------------------------+------------------------------------+ - - -LN --- - -Description ->>>>>>>>>>> - -Usage: ln(x) return the the natural logarithm of x. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `LN(2)` = LN(2) | fields `LN(2)` - fetched rows / total rows = 1/1 - +--------------------+ - | LN(2) | - |--------------------| - | 0.6931471805599453 | - +--------------------+ - - -LOG ---- - -Description ->>>>>>>>>>> - -Specifications: - -Usage: log(x) returns the natural logarithm of x that is the base e logarithm of the x. log(B, x) is equivalent to log(x)/log(B). - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `LOG(2)` = LOG(2), `LOG(2, 8)` = LOG(2, 8) | fields `LOG(2)`, `LOG(2, 8)` - fetched rows / total rows = 1/1 - +--------------------+-----------+ - | LOG(2) | LOG(2, 8) | - |--------------------+-----------| - | 0.6931471805599453 | 3.0 | - +--------------------+-----------+ - - -LOG2 ----- - -Description ->>>>>>>>>>> - -Specifications: - -Usage: log2(x) is equivalent to log(x)/log(2). - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `LOG2(8)` = LOG2(8) | fields `LOG2(8)` - fetched rows / total rows = 1/1 - +---------+ - | LOG2(8) | - |---------| - | 3.0 | - +---------+ - - -LOG10 ------ - -Description ->>>>>>>>>>> - -Specifications: - -Usage: log10(x) is equivalent to log(x)/log(10). - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `LOG10(100)` = LOG10(100) | fields `LOG10(100)` - fetched rows / total rows = 1/1 - +------------+ - | LOG10(100) | - |------------| - | 2.0 | - +------------+ - - -MOD ---- - -Description ->>>>>>>>>>> - -Usage: MOD(n, m) calculates the remainder of the number n divided by m. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: Wider type between types of n and m if m is nonzero value. If m equals to 0, then returns NULL. - -Example:: - - os> source=people | eval `MOD(3, 2)` = MOD(3, 2), `MOD(3.1, 2)` = MOD(3.1, 2) | fields `MOD(3, 2)`, `MOD(3.1, 2)` - fetched rows / total rows = 1/1 - +-----------+-------------+ - | MOD(3, 2) | MOD(3.1, 2) | - |-----------+-------------| - | 1 | 1.1 | - +-----------+-------------+ - - -MODULUS -------- - -Description ->>>>>>>>>>> - -Usage: MODULUS(n, m) calculates the remainder of the number n divided by m. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: Wider type between types of n and m if m is nonzero value. If m equals to 0, then returns NULL. - -Example:: - - os> source=people | eval `MODULUS(3, 2)` = MODULUS(3, 2), `MODULUS(3.1, 2)` = MODULUS(3.1, 2) | fields `MODULUS(3, 2)`, `MODULUS(3.1, 2)` - fetched rows / total rows = 1/1 - +---------------+-----------------+ - | MODULUS(3, 2) | MODULUS(3.1, 2) | - |---------------+-----------------| - | 1 | 1.1 | - +---------------+-----------------+ - - -PI --- - -Description ->>>>>>>>>>> - -Usage: PI() returns the constant pi - -Return type: DOUBLE - -Example:: - - os> source=people | eval `PI()` = PI() | fields `PI()` - fetched rows / total rows = 1/1 - +-------------------+ - | PI() | - |-------------------| - | 3.141592653589793 | - +-------------------+ - - -POW ---- - -Description ->>>>>>>>>>> - -Usage: POW(x, y) calculates the value of x raised to the power of y. Bad inputs return NULL result. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Synonyms: `POWER`_ - -Example:: - - os> source=people | eval `POW(3, 2)` = POW(3, 2), `POW(-3, 2)` = POW(-3, 2), `POW(3, -2)` = POW(3, -2) | fields `POW(3, 2)`, `POW(-3, 2)`, `POW(3, -2)` - fetched rows / total rows = 1/1 - +-----------+------------+--------------------+ - | POW(3, 2) | POW(-3, 2) | POW(3, -2) | - |-----------+------------+--------------------| - | 9.0 | 9.0 | 0.1111111111111111 | - +-----------+------------+--------------------+ - - -POWER ------ - -Description ->>>>>>>>>>> - -Usage: POWER(x, y) calculates the value of x raised to the power of y. Bad inputs return NULL result. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE, INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Synonyms: `POW`_ - -Example:: - - os> source=people | eval `POWER(3, 2)` = POWER(3, 2), `POWER(-3, 2)` = POWER(-3, 2), `POWER(3, -2)` = POWER(3, -2) | fields `POWER(3, 2)`, `POWER(-3, 2)`, `POWER(3, -2)` - fetched rows / total rows = 1/1 - +-------------+--------------+--------------------+ - | POWER(3, 2) | POWER(-3, 2) | POWER(3, -2) | - |-------------+--------------+--------------------| - | 9.0 | 9.0 | 0.1111111111111111 | - +-------------+--------------+--------------------+ - - -RADIANS -------- - -Description ->>>>>>>>>>> - -Usage: radians(x) converts x from degrees to radians. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `RADIANS(90)` = RADIANS(90) | fields `RADIANS(90)` - fetched rows / total rows = 1/1 - +--------------------+ - | RADIANS(90) | - |--------------------| - | 1.5707963267948966 | - +--------------------+ - - -RAND ----- - -Description ->>>>>>>>>>> - -Usage: RAND()/RAND(N) returns a random floating-point value in the range 0 <= value < 1.0. If integer N is specified, the seed is initialized prior to execution. One implication of this behavior is with identical argument N, rand(N) returns the same value each time, and thus produces a repeatable sequence of column values. - -Argument type: INTEGER - -Return type: FLOAT - -Example:: - - os> source=people | eval `RAND(3)` = RAND(3) | fields `RAND(3)` - fetched rows / total rows = 1/1 - +---------------------+ - | RAND(3) | - |---------------------| - | 0.34346429521113886 | - +---------------------+ - - -ROUND ------ - -Description ->>>>>>>>>>> - -Usage: ROUND(x, d) rounds the argument x to d decimal places, d defaults to 0 if not specified - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type map: - -(INTEGER/LONG [,INTEGER]) -> LONG -(FLOAT/DOUBLE [,INTEGER]) -> LONG - -Example:: - - os> source=people | eval `ROUND(12.34)` = ROUND(12.34), `ROUND(12.34, 1)` = ROUND(12.34, 1), `ROUND(12.34, -1)` = ROUND(12.34, -1), `ROUND(12, 1)` = ROUND(12, 1) | fields `ROUND(12.34)`, `ROUND(12.34, 1)`, `ROUND(12.34, -1)`, `ROUND(12, 1)` - fetched rows / total rows = 1/1 - +--------------+-----------------+------------------+--------------+ - | ROUND(12.34) | ROUND(12.34, 1) | ROUND(12.34, -1) | ROUND(12, 1) | - |--------------+-----------------+------------------+--------------| - | 12.0 | 12.3 | 10.0 | 12 | - +--------------+-----------------+------------------+--------------+ - - -SIGN ----- - -Description ->>>>>>>>>>> - -Usage: Returns the sign of the argument as -1, 0, or 1, depending on whether the number is negative, zero, or positive - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: same type with input - -Example:: - - os> source=people | eval `SIGN(1)` = SIGN(1), `SIGN(0)` = SIGN(0), `SIGN(-1.1)` = SIGN(-1.1) | fields `SIGN(1)`, `SIGN(0)`, `SIGN(-1.1)` - fetched rows / total rows = 1/1 - +---------+---------+------------+ - | SIGN(1) | SIGN(0) | SIGN(-1.1) | - |---------+---------+------------| - | 1 | 0 | -1.0 | - +---------+---------+------------+ - - -SIGNUM ------- - -Description ->>>>>>>>>>> - -Usage: Returns the sign of the argument as -1, 0, or 1, depending on whether the number is negative, zero, or positive - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: INTEGER - -Synonyms: `SIGN` - -Example:: - - os> source=people | eval `SIGNUM(1)` = SIGNUM(1), `SIGNUM(0)` = SIGNUM(0), `SIGNUM(-1.1)` = SIGNUM(-1.1) | fields `SIGNUM(1)`, `SIGNUM(0)`, `SIGNUM(-1.1)` - fetched rows / total rows = 1/1 - +-----------+-----------+--------------+ - | SIGNUM(1) | SIGNUM(0) | SIGNUM(-1.1) | - |-----------+-----------+--------------| - | 1 | 0 | -1.0 | - +-----------+-----------+--------------+ - - -SIN ---- - -Description ->>>>>>>>>>> - -Usage: sin(x) calculates the sine of x, where x is given in radians. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `SIN(0)` = SIN(0) | fields `SIN(0)` - fetched rows / total rows = 1/1 - +--------+ - | SIN(0) | - |--------| - | 0.0 | - +--------+ - - -SINH ----- - -Description ->>>>>>>>>>> - -Usage: sinh(x) calculates the hyperbolic sine of x, defined as (((e^x) - (e^(-x))) / 2). - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `SINH(2)` = SINH(2) | fields `SINH(2)` - fetched rows / total rows = 1/1 - +-------------------+ - | SINH(2) | - |-------------------| - | 3.626860407847019 | - +-------------------+ - - -SQRT ----- - -Description ->>>>>>>>>>> - -Usage: Calculates the square root of a non-negative number - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type map: - -(Non-negative) INTEGER/LONG/FLOAT/DOUBLE -> DOUBLE -(Negative) INTEGER/LONG/FLOAT/DOUBLE -> NULL - -Example:: - - os> source=people | eval `SQRT(4)` = SQRT(4), `SQRT(4.41)` = SQRT(4.41) | fields `SQRT(4)`, `SQRT(4.41)` - fetched rows / total rows = 1/1 - +---------+------------+ - | SQRT(4) | SQRT(4.41) | - |---------+------------| - | 2.0 | 2.1 | - +---------+------------+ - - -CBRT ----- - -Description ->>>>>>>>>>> - -Usage: Calculates the cube root of a number - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type DOUBLE: - -INTEGER/LONG/FLOAT/DOUBLE -> DOUBLE - -Example:: - - opensearchsql> source=location | eval `CBRT(8)` = CBRT(8), `CBRT(9.261)` = CBRT(9.261), `CBRT(-27)` = CBRT(-27) | fields `CBRT(8)`, `CBRT(9.261)`, `CBRT(-27)`; - fetched rows / total rows = 2/2 - +---------+-------------+-----------+ - | CBRT(8) | CBRT(9.261) | CBRT(-27) | - |---------+-------------+-----------| - | 2.0 | 2.1 | -3.0 | - | 2.0 | 2.1 | -3.0 | - +---------+-------------+-----------+ - - -RINT ----- - -Description ->>>>>>>>>>> - -Usage: rint(NUMBER T) returns T rounded to the closest whole integer number. - -Argument type: INTEGER/LONG/FLOAT/DOUBLE - -Return type: DOUBLE - -Example:: - - os> source=people | eval `RINT(1.7)` = RINT(1.7) | fields `RINT(1.7)` - fetched rows / total rows = 1/1 - +-----------+ - | RINT(1.7) | - |-----------| - | 2.0 | - +-----------+ \ No newline at end of file diff --git a/docs/user/ppl/functions/relevance.md b/docs/user/ppl/functions/relevance.md new file mode 100644 index 00000000000..a40a3cd7644 --- /dev/null +++ b/docs/user/ppl/functions/relevance.md @@ -0,0 +1,505 @@ +# Relevance Functions + +The relevance based functions enable users to search the index for documents by the relevance of the input query. The functions are built on the top of the search queries of the OpenSearch engine, but in memory execution within the plugin is not supported. These functions are able to perform the global filter of a query, for example the condition expression in a `WHERE` clause or in a `HAVING` clause. For more details of the relevance based search, check out the design here: [Relevance Based Search With SQL/PPL Query Engine](https://github.com/opensearch-project/sql/issues/182) +## MATCH + +### Description + +`match(field_expression, query_expression[, option=]*)` +The match function maps to the match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field. Available parameters include: +- analyzer +- auto_generate_synonyms_phrase +- fuzziness +- max_expansions +- prefix_length +- fuzzy_transpositions +- fuzzy_rewrite +- lenient +- operator +- minimum_should_match +- zero_terms_query +- boost + +Example with only `field` and `query` expressions, and all other parameters are set default values + +```ppl +source=accounts +| where match(address, 'Street') +| fields lastname, address +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------+--------------------+ +| lastname | address | +|----------+--------------------| +| Bond | 671 Bristol Street | +| Bates | 789 Madison Street | ++----------+--------------------+ +``` + +Another example to show how to set custom values for the optional parameters + +```ppl +source=accounts +| where match(firstname, 'Hattie', operator='AND', boost=2.0) +| fields lastname +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------+ +| lastname | +|----------| +| Bond | ++----------+ +``` + +## MATCH_PHRASE + +### Description + +`match_phrase(field_expression, query_expression[, option=]*)` +The match_phrase function maps to the match_phrase query used in search engine, to return the documents that match a provided text with a given field. Available parameters include: +- analyzer +- slop +- zero_terms_query + +For backward compatibility, matchphrase is also supported and mapped to match_phrase query as well. +Example with only `field` and `query` expressions, and all other parameters are set default values + +```ppl +source=books +| where match_phrase(author, 'Alexander Milne') +| fields author, title +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------------+--------------------------+ +| author | title | +|----------------------+--------------------------| +| Alan Alexander Milne | The House at Pooh Corner | +| Alan Alexander Milne | Winnie-the-Pooh | ++----------------------+--------------------------+ +``` + +Another example to show how to set custom values for the optional parameters + +```ppl +source=books +| where match_phrase(author, 'Alan Milne', slop = 2) +| fields author, title +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------------+--------------------------+ +| author | title | +|----------------------+--------------------------| +| Alan Alexander Milne | The House at Pooh Corner | +| Alan Alexander Milne | Winnie-the-Pooh | ++----------------------+--------------------------+ +``` + +## MATCH_PHRASE_PREFIX + +### Description + +`match_phrase_prefix(field_expression, query_expression[, option=]*)` +The match_phrase_prefix function maps to the match_phrase_prefix query used in search engine, to return the documents that match a provided text with a given field. Available parameters include: +- analyzer +- slop +- max_expansions +- boost +- zero_terms_query + +Example with only `field` and `query` expressions, and all other parameters are set default values + +```ppl +source=books +| where match_phrase_prefix(author, 'Alexander Mil') +| fields author, title +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------------+--------------------------+ +| author | title | +|----------------------+--------------------------| +| Alan Alexander Milne | The House at Pooh Corner | +| Alan Alexander Milne | Winnie-the-Pooh | ++----------------------+--------------------------+ +``` + +Another example to show how to set custom values for the optional parameters + +```ppl +source=books +| where match_phrase_prefix(author, 'Alan Mil', slop = 2) +| fields author, title +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----------------------+--------------------------+ +| author | title | +|----------------------+--------------------------| +| Alan Alexander Milne | The House at Pooh Corner | +| Alan Alexander Milne | Winnie-the-Pooh | ++----------------------+--------------------------+ +``` + +## MULTI_MATCH + +### Description + +`multi_match([field_expression+], query_expression[, option=]*)` +`multi_match(query_expression[, option=]*)` +The multi_match function maps to the multi_match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields. +**Two syntax forms are supported:** +1. **With explicit fields** (classic syntax): `multi_match([field_list], query, ...)` +2. **Without fields** (search default fields): `multi_match(query, ...)` + +When fields are omitted, the query searches in the fields specified by the `index.query.default_field` setting. +The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star `"*"` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below: +``multi_match(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)`` +`multi_match(["*"], ...)` +`multi_match("search text", ...)` (searches default fields) +Available parameters include: +- analyzer +- auto_generate_synonyms_phrase +- cutoff_frequency +- fuzziness +- fuzzy_transpositions +- lenient +- max_expansions +- minimum_should_match +- operator +- prefix_length +- tie_breaker +- type +- slop +- boost + +Example with only `fields` and `query` expressions, and all other parameters are set default values + +```ppl +source=books +| where multi_match(['title'], 'Pooh House') +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | +| 2 | Winnie-the-Pooh | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +Another example to show how to set custom values for the optional parameters + +```ppl +source=books +| where multi_match(['title'], 'Pooh House', operator='AND', analyzer=default) +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +Example using the new syntax without specifying fields (searches in index.query.default_field) + +```ppl +source=books +| where multi_match('Pooh House') +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | +| 2 | Winnie-the-Pooh | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +## SIMPLE_QUERY_STRING + +### Description + +`simple_query_string([field_expression+], query_expression[, option=]*)` +`simple_query_string(query_expression[, option=]*)` +The simple_query_string function maps to the simple_query_string query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields. +**Two syntax forms are supported:** +1. **With explicit fields** (classic syntax): `simple_query_string([field_list], query, ...)` +2. **Without fields** (search default fields): `simple_query_string(query, ...)` + +When fields are omitted, the query searches in the fields specified by the `index.query.default_field` setting. +The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star `"*"` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below: +``simple_query_string(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)`` +`simple_query_string(["*"], ...)` +`simple_query_string("search text", ...)` (searches default fields) +Available parameters include: +- analyze_wildcard +- analyzer +- auto_generate_synonyms_phrase +- flags +- fuzziness +- fuzzy_max_expansions +- fuzzy_prefix_length +- fuzzy_transpositions +- lenient +- default_operator +- minimum_should_match +- quote_field_suffix +- boost + +Example with only `fields` and `query` expressions, and all other parameters are set default values + +```ppl +source=books +| where simple_query_string(['title'], 'Pooh House') +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | +| 2 | Winnie-the-Pooh | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +Another example to show how to set custom values for the optional parameters + +```ppl +source=books +| where simple_query_string(['title'], 'Pooh House', flags='ALL', default_operator='AND') +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +Example using the new syntax without specifying fields (searches in index.query.default_field) + +```ppl +source=books +| where simple_query_string('Pooh House') +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | +| 2 | Winnie-the-Pooh | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +## MATCH_BOOL_PREFIX + +### Description + +`match_bool_prefix(field_expression, query_expression)` +The match_bool_prefix function maps to the match_bool_prefix query in the search engine. match_bool_prefix creates a match query from all but the last term in the query string. The last term is used to create a prefix query. +- analyzer +- fuzziness +- max_expansions +- prefix_length +- fuzzy_transpositions +- operator +- fuzzy_rewrite +- minimum_should_match +- boost + +Example with only `field` and `query` expressions, and all other parameters are set default values + +```ppl +source=accounts +| where match_bool_prefix(address, 'Bristol Stre') +| fields firstname, address +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++-----------+--------------------+ +| firstname | address | +|-----------+--------------------| +| Hattie | 671 Bristol Street | +| Nanette | 789 Madison Street | ++-----------+--------------------+ +``` + +Another example to show how to set custom values for the optional parameters + +```ppl +source=accounts +| where match_bool_prefix(address, 'Bristol Stre', minimum_should_match = 2) +| fields firstname, address +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+--------------------+ +| firstname | address | +|-----------+--------------------| +| Hattie | 671 Bristol Street | ++-----------+--------------------+ +``` + +## QUERY_STRING + +### Description + +`query_string([field_expression+], query_expression[, option=]*)` +`query_string(query_expression[, option=]*)` +The query_string function maps to the query_string query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields. +**Two syntax forms are supported:** +1. **With explicit fields** (classic syntax): `query_string([field_list], query, ...)` +2. **Without fields** (search default fields): `query_string(query, ...)` + +When fields are omitted, the query searches in the fields specified by the `index.query.default_field` setting. +The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, +single quotes, in backtick or even without any wrap. All fields search using star `"*"` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, +it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below: +``query_string(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)`` +`query_string(["*"], ...)` +`query_string("search text", ...)` (searches default fields) +Available parameters include: +- analyzer +- escape +- allow_leading_wildcard +- analyze_wildcard +- auto_generate_synonyms_phrase_query +- boost +- default_operator +- enable_position_increments +- fuzziness +- fuzzy_max_expansions +- fuzzy_prefix_length +- fuzzy_transpositions +- fuzzy_rewrite +- tie_breaker +- lenient +- type +- max_determinized_states +- minimum_should_match +- quote_analyzer +- phrase_slop +- quote_field_suffix +- rewrite +- time_zone + +Example with only `fields` and `query` expressions, and all other parameters are set default values + +```ppl +source=books +| where query_string(['title'], 'Pooh House') +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | +| 2 | Winnie-the-Pooh | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +Another example to show how to set custom values for the optional parameters + +```ppl +source=books +| where query_string(['title'], 'Pooh House', default_operator='AND') +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +Example using the new syntax without specifying fields (searches in index.query.default_field) + +```ppl +source=books +| where query_string('Pooh House') +| fields id, title, author +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++----+--------------------------+----------------------+ +| id | title | author | +|----+--------------------------+----------------------| +| 1 | The House at Pooh Corner | Alan Alexander Milne | +| 2 | Winnie-the-Pooh | Alan Alexander Milne | ++----+--------------------------+----------------------+ +``` + +### Limitations + +The relevance functions are available to execute only in OpenSearch DSL but not in memory as of now, so the relevance search might fail for queries that are too complex to translate into DSL if the relevance function is following after a complex PPL query. To make your queries always work-able, it is recommended to place the relevance commands as close to the search command as possible, to ensure the relevance functions are eligible to push down. For example, a complex query like `search source = people | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | where match(employer, 'Open Search') | stats count() by city` could fail because it is difficult to translate to DSL, but it would be better if we rewrite it to an equivalent query as `search source = people | where match(employer, 'Open Search') | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | stats count() by city` by moving the where command with relevance function to the second command right after the search command, and the relevance would be optimized and executed smoothly in OpenSearch DSL. See [Optimization](../../optimization/optimization.rst) to get more details about the query engine optimization. \ No newline at end of file diff --git a/docs/user/ppl/functions/relevance.rst b/docs/user/ppl/functions/relevance.rst deleted file mode 100644 index 3f30586c730..00000000000 --- a/docs/user/ppl/functions/relevance.rst +++ /dev/null @@ -1,424 +0,0 @@ -=================== -Relevance Functions -=================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -The relevance based functions enable users to search the index for documents by the relevance of the input query. The functions are built on the top of the search queries of the OpenSearch engine, but in memory execution within the plugin is not supported. These functions are able to perform the global filter of a query, for example the condition expression in a ``WHERE`` clause or in a ``HAVING`` clause. For more details of the relevance based search, check out the design here: `Relevance Based Search With SQL/PPL Query Engine `_ - -MATCH ------ - -Description ->>>>>>>>>>> - -``match(field_expression, query_expression[, option=]*)`` - -The match function maps to the match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field. Available parameters include: - -- analyzer -- auto_generate_synonyms_phrase -- fuzziness -- max_expansions -- prefix_length -- fuzzy_transpositions -- fuzzy_rewrite -- lenient -- operator -- minimum_should_match -- zero_terms_query -- boost - -Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: - - os> source=accounts | where match(address, 'Street') | fields lastname, address; - fetched rows / total rows = 2/2 - +----------+--------------------+ - | lastname | address | - |----------+--------------------| - | Bond | 671 Bristol Street | - | Bates | 789 Madison Street | - +----------+--------------------+ - - - -Another example to show how to set custom values for the optional parameters:: - - os> source=accounts | where match(firstname, 'Hattie', operator='AND', boost=2.0) | fields lastname; - fetched rows / total rows = 1/1 - +----------+ - | lastname | - |----------| - | Bond | - +----------+ - - -MATCH_PHRASE ------------- - -Description ->>>>>>>>>>> - -``match_phrase(field_expression, query_expression[, option=]*)`` - -The match_phrase function maps to the match_phrase query used in search engine, to return the documents that match a provided text with a given field. Available parameters include: - -- analyzer -- slop -- zero_terms_query - -For backward compatibility, matchphrase is also supported and mapped to match_phrase query as well. - -Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: - - os> source=books | where match_phrase(author, 'Alexander Milne') | fields author, title - fetched rows / total rows = 2/2 - +----------------------+--------------------------+ - | author | title | - |----------------------+--------------------------| - | Alan Alexander Milne | The House at Pooh Corner | - | Alan Alexander Milne | Winnie-the-Pooh | - +----------------------+--------------------------+ - - - -Another example to show how to set custom values for the optional parameters:: - - os> source=books | where match_phrase(author, 'Alan Milne', slop = 2) | fields author, title - fetched rows / total rows = 2/2 - +----------------------+--------------------------+ - | author | title | - |----------------------+--------------------------| - | Alan Alexander Milne | The House at Pooh Corner | - | Alan Alexander Milne | Winnie-the-Pooh | - +----------------------+--------------------------+ - - -MATCH_PHRASE_PREFIX -------------------- - -Description ->>>>>>>>>>> - -``match_phrase_prefix(field_expression, query_expression[, option=]*)`` - -The match_phrase_prefix function maps to the match_phrase_prefix query used in search engine, to return the documents that match a provided text with a given field. Available parameters include: - -- analyzer -- slop -- max_expansions -- boost -- zero_terms_query - -Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: - - os> source=books | where match_phrase_prefix(author, 'Alexander Mil') | fields author, title - fetched rows / total rows = 2/2 - +----------------------+--------------------------+ - | author | title | - |----------------------+--------------------------| - | Alan Alexander Milne | The House at Pooh Corner | - | Alan Alexander Milne | Winnie-the-Pooh | - +----------------------+--------------------------+ - - - -Another example to show how to set custom values for the optional parameters:: - - os> source=books | where match_phrase_prefix(author, 'Alan Mil', slop = 2) | fields author, title - fetched rows / total rows = 2/2 - +----------------------+--------------------------+ - | author | title | - |----------------------+--------------------------| - | Alan Alexander Milne | The House at Pooh Corner | - | Alan Alexander Milne | Winnie-the-Pooh | - +----------------------+--------------------------+ - - -MULTI_MATCH ------------ - -Description ->>>>>>>>>>> - -``multi_match([field_expression+], query_expression[, option=]*)`` - -``multi_match(query_expression[, option=]*)`` - -The multi_match function maps to the multi_match query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields. - -**Two syntax forms are supported:** - -1. **With explicit fields** (classic syntax): ``multi_match([field_list], query, ...)`` -2. **Without fields** (search default fields): ``multi_match(query, ...)`` - -When fields are omitted, the query searches in the fields specified by the ``index.query.default_field`` setting. - -The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below: - -| ``multi_match(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)`` -| ``multi_match(["*"], ...)`` -| ``multi_match("search text", ...)`` (searches default fields) - - -Available parameters include: - -- analyzer -- auto_generate_synonyms_phrase -- cutoff_frequency -- fuzziness -- fuzzy_transpositions -- lenient -- max_expansions -- minimum_should_match -- operator -- prefix_length -- tie_breaker -- type -- slop -- boost - -Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values:: - - os> source=books | where multi_match(['title'], 'Pooh House') | fields id, title, author; - fetched rows / total rows = 2/2 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +----+--------------------------+----------------------+ - -Another example to show how to set custom values for the optional parameters:: - - os> source=books | where multi_match(['title'], 'Pooh House', operator='AND', analyzer=default) | fields id, title, author; - fetched rows / total rows = 1/1 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +----+--------------------------+----------------------+ - -Example using the new syntax without specifying fields (searches in index.query.default_field):: - - os> source=books | where multi_match('Pooh House') | fields id, title, author; - fetched rows / total rows = 2/2 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +----+--------------------------+----------------------+ - - -SIMPLE_QUERY_STRING -------------------- - -Description ->>>>>>>>>>> - -``simple_query_string([field_expression+], query_expression[, option=]*)`` - -``simple_query_string(query_expression[, option=]*)`` - -The simple_query_string function maps to the simple_query_string query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields. - -**Two syntax forms are supported:** - -1. **With explicit fields** (classic syntax): ``simple_query_string([field_list], query, ...)`` -2. **Without fields** (search default fields): ``simple_query_string(query, ...)`` - -When fields are omitted, the query searches in the fields specified by the ``index.query.default_field`` setting. - -The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below: - -| ``simple_query_string(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)`` -| ``simple_query_string(["*"], ...)`` -| ``simple_query_string("search text", ...)`` (searches default fields) - - -Available parameters include: - -- analyze_wildcard -- analyzer -- auto_generate_synonyms_phrase -- flags -- fuzziness -- fuzzy_max_expansions -- fuzzy_prefix_length -- fuzzy_transpositions -- lenient -- default_operator -- minimum_should_match -- quote_field_suffix -- boost - -Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values:: - - os> source=books | where simple_query_string(['title'], 'Pooh House') | fields id, title, author; - fetched rows / total rows = 2/2 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +----+--------------------------+----------------------+ - -Another example to show how to set custom values for the optional parameters:: - - os> source=books | where simple_query_string(['title'], 'Pooh House', flags='ALL', default_operator='AND') | fields id, title, author; - fetched rows / total rows = 1/1 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +----+--------------------------+----------------------+ - -Example using the new syntax without specifying fields (searches in index.query.default_field):: - - os> source=books | where simple_query_string('Pooh House') | fields id, title, author; - fetched rows / total rows = 2/2 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +----+--------------------------+----------------------+ - - -MATCH_BOOL_PREFIX ------------------ - -Description ->>>>>>>>>>> - -``match_bool_prefix(field_expression, query_expression)`` - -The match_bool_prefix function maps to the match_bool_prefix query in the search engine. match_bool_prefix creates a match query from all but the last term in the query string. The last term is used to create a prefix query. - -- analyzer -- fuzziness -- max_expansions -- prefix_length -- fuzzy_transpositions -- operator -- fuzzy_rewrite -- minimum_should_match -- boost - -Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: - - os> source=accounts | where match_bool_prefix(address, 'Bristol Stre') | fields firstname, address - fetched rows / total rows = 2/2 - +-----------+--------------------+ - | firstname | address | - |-----------+--------------------| - | Hattie | 671 Bristol Street | - | Nanette | 789 Madison Street | - +-----------+--------------------+ - -Another example to show how to set custom values for the optional parameters:: - - os> source=accounts | where match_bool_prefix(address, 'Bristol Stre', minimum_should_match = 2) | fields firstname, address - fetched rows / total rows = 1/1 - +-----------+--------------------+ - | firstname | address | - |-----------+--------------------| - | Hattie | 671 Bristol Street | - +-----------+--------------------+ - - -QUERY_STRING ------------- - -Description ->>>>>>>>>>> - -``query_string([field_expression+], query_expression[, option=]*)`` - -``query_string(query_expression[, option=]*)`` - -The query_string function maps to the query_string query used in search engine, to return the documents that match a provided text, number, date or boolean value with a given field or fields. - -**Two syntax forms are supported:** - -1. **With explicit fields** (classic syntax): ``query_string([field_list], query, ...)`` -2. **Without fields** (search default fields): ``query_string(query, ...)`` - -When fields are omitted, the query searches in the fields specified by the ``index.query.default_field`` setting. - -The **^** lets you *boost* certain fields. Boosts are multipliers that weigh matches in one field more heavily than matches in other fields. The syntax allows to specify the fields in double quotes, -single quotes, in backtick or even without any wrap. All fields search using star ``"*"`` is also available (star symbol should be wrapped). The weight is optional and should be specified using after the field name, -it could be delimeted by the `caret` character or by whitespace. Please, refer to examples below: - -| ``query_string(["Tags" ^ 2, 'Title' 3.4, `Body`, Comments ^ 0.3], ...)`` -| ``query_string(["*"], ...)`` -| ``query_string("search text", ...)`` (searches default fields) - - -Available parameters include: - -- analyzer -- escape -- allow_leading_wildcard -- analyze_wildcard -- auto_generate_synonyms_phrase_query -- boost -- default_operator -- enable_position_increments -- fuzziness -- fuzzy_max_expansions -- fuzzy_prefix_length -- fuzzy_transpositions -- fuzzy_rewrite -- tie_breaker -- lenient -- type -- max_determinized_states -- minimum_should_match -- quote_analyzer -- phrase_slop -- quote_field_suffix -- rewrite -- time_zone - -Example with only ``fields`` and ``query`` expressions, and all other parameters are set default values:: - - os> source=books | where query_string(['title'], 'Pooh House') | fields id, title, author; - fetched rows / total rows = 2/2 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +----+--------------------------+----------------------+ - -Another example to show how to set custom values for the optional parameters:: - - os> source=books | where query_string(['title'], 'Pooh House', default_operator='AND') | fields id, title, author; - fetched rows / total rows = 1/1 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +----+--------------------------+----------------------+ - -Example using the new syntax without specifying fields (searches in index.query.default_field):: - - os> source=books | where query_string('Pooh House') | fields id, title, author; - fetched rows / total rows = 2/2 - +----+--------------------------+----------------------+ - | id | title | author | - |----+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +----+--------------------------+----------------------+ - -Limitations ->>>>>>>>>>> - -The relevance functions are available to execute only in OpenSearch DSL but not in memory as of now, so the relevance search might fail for queries that are too complex to translate into DSL if the relevance function is following after a complex PPL query. To make your queries always work-able, it is recommended to place the relevance commands as close to the search command as possible, to ensure the relevance functions are eligible to push down. For example, a complex query like ``search source = people | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | where match(employer, 'Open Search') | stats count() by city`` could fail because it is difficult to translate to DSL, but it would be better if we rewrite it to an equivalent query as ``search source = people | where match(employer, 'Open Search') | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | stats count() by city`` by moving the where command with relevance function to the second command right after the search command, and the relevance would be optimized and executed smoothly in OpenSearch DSL. See `Optimization <../../optimization/optimization.rst>`_ to get more details about the query engine optimization. diff --git a/docs/user/ppl/functions/statistical.md b/docs/user/ppl/functions/statistical.md new file mode 100644 index 00000000000..b1098566916 --- /dev/null +++ b/docs/user/ppl/functions/statistical.md @@ -0,0 +1,142 @@ +# Statistical Functions + +## MAX + +### Description + +Usage: max(x, y, ...) returns the maximum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the maximum string value (lexicographically ordered) +Note: This function is only available in the eval command context. +Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments +Return type: Type of the selected argument +Example + +```ppl +source=accounts +| eval max_val = MAX(age, 30) +| fields age, max_val +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+---------+ +| age | max_val | +|-----+---------| +| 32 | 32 | +| 36 | 36 | +| 28 | 30 | +| 33 | 33 | ++-----+---------+ +``` + +```ppl +source=accounts +| eval result = MAX(firstname, 'John') +| fields firstname, result +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+---------+ +| firstname | result | +|-----------+---------| +| Amber | John | +| Hattie | John | +| Nanette | Nanette | +| Dale | John | ++-----------+---------+ +``` + +```ppl +source=accounts +| eval result = MAX(age, 35, 'John', firstname) +| fields age, firstname, result +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+-----------+---------+ +| age | firstname | result | +|-----+-----------+---------| +| 32 | Amber | John | +| 36 | Hattie | John | +| 28 | Nanette | Nanette | +| 33 | Dale | John | ++-----+-----------+---------+ +``` + +## MIN + +### Description + +Usage: min(x, y, ...) returns the minimum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the minimum numeric value. +Note: This function is only available in the eval command context. +Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments +Return type: Type of the selected argument +Example + +```ppl +source=accounts +| eval min_val = MIN(age, 30) +| fields age, min_val +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+---------+ +| age | min_val | +|-----+---------| +| 32 | 30 | +| 36 | 30 | +| 28 | 28 | +| 33 | 30 | ++-----+---------+ +``` + +```ppl +source=accounts +| eval result = MIN(firstname, 'John') +| fields firstname, result +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+--------+ +| firstname | result | +|-----------+--------| +| Amber | Amber | +| Hattie | Hattie | +| Nanette | John | +| Dale | Dale | ++-----------+--------+ +``` + +```ppl +source=accounts +| eval result = MIN(age, 35, firstname) +| fields age, firstname, result +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----+-----------+--------+ +| age | firstname | result | +|-----+-----------+--------| +| 32 | Amber | 32 | +| 36 | Hattie | 35 | +| 28 | Nanette | 28 | +| 33 | Dale | 33 | ++-----+-----------+--------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/statistical.rst b/docs/user/ppl/functions/statistical.rst deleted file mode 100644 index 3729c1991ca..00000000000 --- a/docs/user/ppl/functions/statistical.rst +++ /dev/null @@ -1,109 +0,0 @@ -====================== -Statistical Functions -====================== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -MAX ---- - -Description ->>>>>>>>>>> - -Usage: max(x, y, ...) returns the maximum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the maximum string value (lexicographically ordered) - -Note: This function is only available in the eval command context. - -Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments - -Return type: Type of the selected argument - -Example:: - - os> source=accounts | eval max_val = MAX(age, 30) | fields age, max_val - fetched rows / total rows = 4/4 - +-----+---------+ - | age | max_val | - |-----+---------| - | 32 | 32 | - | 36 | 36 | - | 28 | 30 | - | 33 | 33 | - +-----+---------+ - - os> source=accounts | eval result = MAX(firstname, 'John') | fields firstname, result - fetched rows / total rows = 4/4 - +-----------+---------+ - | firstname | result | - |-----------+---------| - | Amber | John | - | Hattie | John | - | Nanette | Nanette | - | Dale | John | - +-----------+---------+ - - os> source=accounts | eval result = MAX(age, 35, 'John', firstname) | fields age, firstname, result - fetched rows / total rows = 4/4 - +-----+-----------+---------+ - | age | firstname | result | - |-----+-----------+---------| - | 32 | Amber | John | - | 36 | Hattie | John | - | 28 | Nanette | Nanette | - | 33 | Dale | John | - +-----+-----------+---------+ - - -MIN ---- - -Description ->>>>>>>>>>> - -Usage: min(x, y, ...) returns the minimum value from all provided arguments. Strings are treated as greater than numbers, so if provided both strings and numbers, it will return the minimum numeric value. - -Note: This function is only available in the eval command context. - -Argument type: Variable number of INTEGER/LONG/FLOAT/DOUBLE/STRING arguments - -Return type: Type of the selected argument - -Example:: - - os> source=accounts | eval min_val = MIN(age, 30) | fields age, min_val - fetched rows / total rows = 4/4 - +-----+---------+ - | age | min_val | - |-----+---------| - | 32 | 30 | - | 36 | 30 | - | 28 | 28 | - | 33 | 30 | - +-----+---------+ - - os> source=accounts | eval result = MIN(firstname, 'John') | fields firstname, result - fetched rows / total rows = 4/4 - +-----------+--------+ - | firstname | result | - |-----------+--------| - | Amber | Amber | - | Hattie | Hattie | - | Nanette | John | - | Dale | Dale | - +-----------+--------+ - - os> source=accounts | eval result = MIN(age, 35, firstname) | fields age, firstname, result - fetched rows / total rows = 4/4 - +-----+-----------+--------+ - | age | firstname | result | - |-----+-----------+--------| - | 32 | Amber | 32 | - | 36 | Hattie | 35 | - | 28 | Nanette | 28 | - | 33 | Dale | 33 | - +-----+-----------+--------+ \ No newline at end of file diff --git a/docs/user/ppl/functions/string.md b/docs/user/ppl/functions/string.md new file mode 100644 index 00000000000..04a3485c492 --- /dev/null +++ b/docs/user/ppl/functions/string.md @@ -0,0 +1,549 @@ +# String Functions + +## CONCAT + +### Description + +Usage: CONCAT(str1, str2, ...., str_9) adds up to 9 strings together. +Argument type: STRING, STRING, ...., STRING +Return type: STRING +Example + +```ppl +source=people +| eval `CONCAT('hello', 'world')` = CONCAT('hello', 'world'), `CONCAT('hello ', 'whole ', 'world', '!')` = CONCAT('hello ', 'whole ', 'world', '!') +| fields `CONCAT('hello', 'world')`, `CONCAT('hello ', 'whole ', 'world', '!')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------+------------------------------------------+ +| CONCAT('hello', 'world') | CONCAT('hello ', 'whole ', 'world', '!') | +|--------------------------+------------------------------------------| +| helloworld | hello whole world! | ++--------------------------+------------------------------------------+ +``` + +## CONCAT_WS + +### Description + +Usage: CONCAT_WS(sep, str1, str2) returns str1 concatenated with str2 using sep as a separator between them. +Argument type: STRING, STRING, STRING +Return type: STRING +Example + +```ppl +source=people +| eval `CONCAT_WS(',', 'hello', 'world')` = CONCAT_WS(',', 'hello', 'world') +| fields `CONCAT_WS(',', 'hello', 'world')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------------+ +| CONCAT_WS(',', 'hello', 'world') | +|----------------------------------| +| hello,world | ++----------------------------------+ +``` + +## LENGTH + +### Description + +Specifications: +1. LENGTH(STRING) -> INTEGER + +Usage: length(str) returns length of string measured in bytes. +Argument type: STRING +Return type: INTEGER +Example + +```ppl +source=people +| eval `LENGTH('helloworld')` = LENGTH('helloworld') +| fields `LENGTH('helloworld')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------+ +| LENGTH('helloworld') | +|----------------------| +| 10 | ++----------------------+ +``` + +## LIKE + +### Description + +Usage: like(string, PATTERN[, case_sensitive]) return true if the string match the PATTERN. `case_sensitive` is optional. When set to `true`, PATTERN is **case-sensitive**. **Default:** Determined by `plugins.ppl.syntax.legacy.preferred`. + * When `plugins.ppl.syntax.legacy.preferred=true`, `case_sensitive` defaults to `false` + * When `plugins.ppl.syntax.legacy.preferred=false`, `case_sensitive` defaults to `true` + +There are two wildcards often used in conjunction with the LIKE operator: +* `%` - The percent sign represents zero, one, or multiple characters +* `_` - The underscore represents a single character + +Argument type: STRING, STRING [, BOOLEAN] +Return type: INTEGER +Example + +```ppl +source=people +| eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ello%'), `LIKE('hello world', '_ELLo%', true)` = LIKE('hello world', '_ELLo%', true), `LIKE('hello world', '_ELLo%', false)` = LIKE('hello world', '_ELLo%', false) +| fields `LIKE('hello world', '_ello%')`, `LIKE('hello world', '_ELLo%', true)`, `LIKE('hello world', '_ELLo%', false)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------+-------------------------------------+--------------------------------------+ +| LIKE('hello world', '_ello%') | LIKE('hello world', '_ELLo%', true) | LIKE('hello world', '_ELLo%', false) | +|-------------------------------+-------------------------------------+--------------------------------------| +| True | False | True | ++-------------------------------+-------------------------------------+--------------------------------------+ +``` + +Limitation: The pushdown of the LIKE function to a DSL wildcard query is supported only for keyword fields. +## ILIKE + +### Description + +Usage: ilike(string, PATTERN) return true if the string match the PATTERN, PATTERN is **case-insensitive**. +There are two wildcards often used in conjunction with the ILIKE operator: +* `%` - The percent sign represents zero, one, or multiple characters +* `_` - The underscore represents a single character + +Argument type: STRING, STRING +Return type: INTEGER +Example + +```ppl +source=people +| eval `ILIKE('hello world', '_ELLo%')` = ILIKE('hello world', '_ELLo%') +| fields `ILIKE('hello world', '_ELLo%')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------+ +| ILIKE('hello world', '_ELLo%') | +|--------------------------------| +| True | ++--------------------------------+ +``` + +Limitation: The pushdown of the ILIKE function to a DSL wildcard query is supported only for keyword fields. +## LOCATE + +### Description + +Usage: locate(substr, str[, start]) returns the position of the first occurrence of substring substr in string str, starting searching from position start. If start is not specified, it defaults to 1 (the beginning of the string). Returns 0 if substr is not found. If any argument is NULL, the function returns NULL. +Argument type: STRING, STRING[, INTEGER] +Return type: INTEGER +Example + +```ppl +source=people +| eval `LOCATE('world', 'helloworld')` = LOCATE('world', 'helloworld'), `LOCATE('invalid', 'helloworld')` = LOCATE('invalid', 'helloworld'), `LOCATE('world', 'helloworld', 6)` = LOCATE('world', 'helloworld', 6) +| fields `LOCATE('world', 'helloworld')`, `LOCATE('invalid', 'helloworld')`, `LOCATE('world', 'helloworld', 6)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------------------+---------------------------------+----------------------------------+ +| LOCATE('world', 'helloworld') | LOCATE('invalid', 'helloworld') | LOCATE('world', 'helloworld', 6) | +|-------------------------------+---------------------------------+----------------------------------| +| 6 | 0 | 6 | ++-------------------------------+---------------------------------+----------------------------------+ +``` + +## LOWER + +### Description + +Usage: lower(string) converts the string to lowercase. +Argument type: STRING +Return type: STRING +Example + +```ppl +source=people +| eval `LOWER('helloworld')` = LOWER('helloworld'), `LOWER('HELLOWORLD')` = LOWER('HELLOWORLD') +| fields `LOWER('helloworld')`, `LOWER('HELLOWORLD')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+---------------------+ +| LOWER('helloworld') | LOWER('HELLOWORLD') | +|---------------------+---------------------| +| helloworld | helloworld | ++---------------------+---------------------+ +``` + +## LTRIM + +### Description + +Usage: ltrim(str) trims leading space characters from the string. +Argument type: STRING +Return type: STRING +Example + +```ppl +source=people +| eval `LTRIM(' hello')` = LTRIM(' hello'), `LTRIM('hello ')` = LTRIM('hello ') +| fields `LTRIM(' hello')`, `LTRIM('hello ')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+-------------------+ +| LTRIM(' hello') | LTRIM('hello ') | +|-------------------+-------------------| +| hello | hello | ++-------------------+-------------------+ +``` + +## POSITION + +### Description + +Usage: The syntax POSITION(substr IN str) returns the position of the first occurrence of substring substr in string str. Returns 0 if substr is not in str. Returns NULL if any argument is NULL. +Argument type: STRING, STRING +Return type INTEGER +(STRING IN STRING) -> INTEGER +Example + +```ppl +source=people +| eval `POSITION('world' IN 'helloworld')` = POSITION('world' IN 'helloworld'), `POSITION('invalid' IN 'helloworld')`= POSITION('invalid' IN 'helloworld') +| fields `POSITION('world' IN 'helloworld')`, `POSITION('invalid' IN 'helloworld')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------+-------------------------------------+ +| POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') | +|-----------------------------------+-------------------------------------| +| 6 | 0 | ++-----------------------------------+-------------------------------------+ +``` + +## REPLACE + +### Description + +Usage: replace(str, pattern, replacement) returns a string with all occurrences of the pattern replaced by the replacement string in str. If any argument is NULL, the function returns NULL. +**Regular Expression Support**: The pattern argument supports Java regex syntax, including: +Argument type: STRING, STRING (regex pattern), STRING (replacement) +Return type: STRING +**Important - Regex Special Characters**: The pattern is interpreted as a regular expression. Characters like `.`, `*`, `+`, `[`, `]`, `(`, `)`, `{`, `}`, `^`, `$`, `|`, `?`, and `\` have special meaning in regex. To match them literally, escape with backslashes: +* To match `example.com`: use `'example\\.com'` (escape the dots) +* To match `value*`: use `'value\\*'` (escape the asterisk) +* To match `price+tax`: use `'price\\+tax'` (escape the plus) + +For strings with many special characters, use `\\Q...\\E` to quote the entire literal string (e.g., `'\\Qhttps://example.com/path?id=123\\E'` matches that exact URL). +Literal String Replacement Examples + +```ppl +source=people +| eval `REPLACE('helloworld', 'world', 'universe')` = REPLACE('helloworld', 'world', 'universe'), `REPLACE('helloworld', 'invalid', 'universe')` = REPLACE('helloworld', 'invalid', 'universe') +| fields `REPLACE('helloworld', 'world', 'universe')`, `REPLACE('helloworld', 'invalid', 'universe')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------------------------------------+----------------------------------------------+ +| REPLACE('helloworld', 'world', 'universe') | REPLACE('helloworld', 'invalid', 'universe') | +|--------------------------------------------+----------------------------------------------| +| hellouniverse | helloworld | ++--------------------------------------------+----------------------------------------------+ +``` + +Escaping Special Characters Examples + +```ppl +source=people +| eval `Replace domain` = REPLACE('api.example.com', 'example\\.com', 'newsite.org'), `Replace with quote` = REPLACE('https://api.example.com/v1', '\\Qhttps://api.example.com\\E', 'http://localhost:8080') +| fields `Replace domain`, `Replace with quote` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------+--------------------------+ +| Replace domain | Replace with quote | +|-----------------+--------------------------| +| api.newsite.org | http://localhost:8080/v1 | ++-----------------+--------------------------+ +``` + +Regex Pattern Examples + +```ppl +source=people +| eval `Remove digits` = REPLACE('test123', '\\d+', ''), `Collapse spaces` = REPLACE('hello world', ' +', ' '), `Remove special` = REPLACE('hello@world!', '[^a-zA-Z]', '') +| fields `Remove digits`, `Collapse spaces`, `Remove special` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------+-----------------+----------------+ +| Remove digits | Collapse spaces | Remove special | +|---------------+-----------------+----------------| +| test | hello world | helloworld | ++---------------+-----------------+----------------+ +``` + +Capture Group and Backreference Examples + +```ppl +source=people +| eval `Swap date` = REPLACE('1/14/2023', '^(\\d{1,2})/(\\d{1,2})/', '$2/$1/'), `Reverse words` = REPLACE('Hello World', '(\\w+) (\\w+)', '$2 $1'), `Extract domain` = REPLACE('user@example.com', '.*@(.+)', '$1') +| fields `Swap date`, `Reverse words`, `Extract domain` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------+---------------+----------------+ +| Swap date | Reverse words | Extract domain | +|-----------+---------------+----------------| +| 14/1/2023 | World Hello | example.com | ++-----------+---------------+----------------+ +``` + +Advanced Regex Examples + +```ppl +source=people +| eval `Clean phone` = REPLACE('(555) 123-4567', '[^0-9]', ''), `Remove vowels` = REPLACE('hello world', '[aeiou]', ''), `Add prefix` = REPLACE('test', '^', 'pre_') +| fields `Clean phone`, `Remove vowels`, `Add prefix` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------+---------------+------------+ +| Clean phone | Remove vowels | Add prefix | +|-------------+---------------+------------| +| 5551234567 | hll wrld | pre_test | ++-------------+---------------+------------+ +``` + +**Note**: When using regex patterns in PPL queries: +* Backslashes must be escaped (use `\\` instead of `\`) - e.g., `\\d` for digit pattern, `\\w+` for word characters +* Backreferences support both PCRE-style (`\1`, `\2`, etc.) and Java-style (`$1`, `$2`, etc.) syntax. PCRE-style backreferences are automatically converted to Java-style internally. + +## REVERSE + +### Description + +Usage: REVERSE(str) returns reversed string of the string supplied as an argument. +Argument type: STRING +Return type: STRING +Example + +```ppl +source=people +| eval `REVERSE('abcde')` = REVERSE('abcde') +| fields `REVERSE('abcde')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------+ +| REVERSE('abcde') | +|------------------| +| edcba | ++------------------+ +``` + +## RIGHT + +### Description + +Usage: right(str, len) returns the rightmost len characters from the string str, or NULL if any argument is NULL. +Argument type: STRING, INTEGER +Return type: STRING +Example + +```ppl +source=people +| eval `RIGHT('helloworld', 5)` = RIGHT('helloworld', 5), `RIGHT('HELLOWORLD', 0)` = RIGHT('HELLOWORLD', 0) +| fields `RIGHT('helloworld', 5)`, `RIGHT('HELLOWORLD', 0)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------------+------------------------+ +| RIGHT('helloworld', 5) | RIGHT('HELLOWORLD', 0) | +|------------------------+------------------------| +| world | | ++------------------------+------------------------+ +``` + +## RTRIM + +### Description + +Usage: rtrim(str) trims trailing space characters from the string. +Argument type: STRING +Return type: STRING +Example + +```ppl +source=people +| eval `RTRIM(' hello')` = RTRIM(' hello'), `RTRIM('hello ')` = RTRIM('hello ') +| fields `RTRIM(' hello')`, `RTRIM('hello ')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-------------------+-------------------+ +| RTRIM(' hello') | RTRIM('hello ') | +|-------------------+-------------------| +| hello | hello | ++-------------------+-------------------+ +``` + +## SUBSTRING + +### Description + +Usage: substring(str, start) or substring(str, start, length) returns substring using start and length. With no length, entire string from start is returned. +Argument type: STRING, INTEGER, INTEGER +Return type: STRING +Synonyms: SUBSTR +Example + +```ppl +source=people +| eval `SUBSTRING('helloworld', 5)` = SUBSTRING('helloworld', 5), `SUBSTRING('helloworld', 5, 3)` = SUBSTRING('helloworld', 5, 3) +| fields `SUBSTRING('helloworld', 5)`, `SUBSTRING('helloworld', 5, 3)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------------------+-------------------------------+ +| SUBSTRING('helloworld', 5) | SUBSTRING('helloworld', 5, 3) | +|----------------------------+-------------------------------| +| oworld | owo | ++----------------------------+-------------------------------+ +``` + +## TRIM + +### Description + +Argument Type: STRING +Return type: STRING +Example + +```ppl +source=people +| eval `TRIM(' hello')` = TRIM(' hello'), `TRIM('hello ')` = TRIM('hello ') +| fields `TRIM(' hello')`, `TRIM('hello ')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++------------------+------------------+ +| TRIM(' hello') | TRIM('hello ') | +|------------------+------------------| +| hello | hello | ++------------------+------------------+ +``` + +## UPPER + +### Description + +Usage: upper(string) converts the string to uppercase. +Argument type: STRING +Return type: STRING +Example + +```ppl +source=people +| eval `UPPER('helloworld')` = UPPER('helloworld'), `UPPER('HELLOWORLD')` = UPPER('HELLOWORLD') +| fields `UPPER('helloworld')`, `UPPER('HELLOWORLD')` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------------------+---------------------+ +| UPPER('helloworld') | UPPER('HELLOWORLD') | +|---------------------+---------------------| +| HELLOWORLD | HELLOWORLD | ++---------------------+---------------------+ +``` + +## REGEXP_REPLACE + +### Description + +Usage: regexp_replace(str, pattern, replacement) replace all substrings of the string value that match pattern with replacement and returns modified string value. +Argument type: STRING, STRING, STRING +Return type: STRING +Synonyms: [REPLACE](#replace) +Example + +```ppl +source=people +| eval `DOMAIN` = REGEXP_REPLACE('https://opensearch.org/downloads/', '^https?://(?:www\.)?([^/]+)/.*$', '\1') +| fields `DOMAIN` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++----------------+ +| DOMAIN | +|----------------| +| opensearch.org | ++----------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst deleted file mode 100644 index 3e94d220094..00000000000 --- a/docs/user/ppl/functions/string.rst +++ /dev/null @@ -1,479 +0,0 @@ -================ -String Functions -================ - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -CONCAT ------- - -Description ->>>>>>>>>>> - -Usage: CONCAT(str1, str2, ...., str_9) adds up to 9 strings together. - -Argument type: STRING, STRING, ...., STRING - -Return type: STRING - -Example:: - - os> source=people | eval `CONCAT('hello', 'world')` = CONCAT('hello', 'world'), `CONCAT('hello ', 'whole ', 'world', '!')` = CONCAT('hello ', 'whole ', 'world', '!') | fields `CONCAT('hello', 'world')`, `CONCAT('hello ', 'whole ', 'world', '!')` - fetched rows / total rows = 1/1 - +--------------------------+------------------------------------------+ - | CONCAT('hello', 'world') | CONCAT('hello ', 'whole ', 'world', '!') | - |--------------------------+------------------------------------------| - | helloworld | hello whole world! | - +--------------------------+------------------------------------------+ - - -CONCAT_WS ---------- - -Description ->>>>>>>>>>> - -Usage: CONCAT_WS(sep, str1, str2) returns str1 concatenated with str2 using sep as a separator between them. - -Argument type: STRING, STRING, STRING - -Return type: STRING - -Example:: - - os> source=people | eval `CONCAT_WS(',', 'hello', 'world')` = CONCAT_WS(',', 'hello', 'world') | fields `CONCAT_WS(',', 'hello', 'world')` - fetched rows / total rows = 1/1 - +----------------------------------+ - | CONCAT_WS(',', 'hello', 'world') | - |----------------------------------| - | hello,world | - +----------------------------------+ - - -LENGTH ------- - -Description ->>>>>>>>>>> - -Specifications: - -1. LENGTH(STRING) -> INTEGER - -Usage: length(str) returns length of string measured in bytes. - -Argument type: STRING - -Return type: INTEGER - -Example:: - - os> source=people | eval `LENGTH('helloworld')` = LENGTH('helloworld') | fields `LENGTH('helloworld')` - fetched rows / total rows = 1/1 - +----------------------+ - | LENGTH('helloworld') | - |----------------------| - | 10 | - +----------------------+ - -LIKE ----- - -Description ->>>>>>>>>>> - -Usage: like(string, PATTERN[, case_sensitive]) return true if the string match the PATTERN. ``case_sensitive`` is optional. When set to ``true``, PATTERN is **case-sensitive**. **Default:** Determined by ``plugins.ppl.syntax.legacy.preferred``. - - * When ``plugins.ppl.syntax.legacy.preferred=true``, ``case_sensitive`` defaults to ``false`` - * When ``plugins.ppl.syntax.legacy.preferred=false``, ``case_sensitive`` defaults to ``true`` - -There are two wildcards often used in conjunction with the LIKE operator: - -* ``%`` - The percent sign represents zero, one, or multiple characters -* ``_`` - The underscore represents a single character - -Argument type: STRING, STRING [, BOOLEAN] - -Return type: INTEGER - -Example:: - - os> source=people | eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ello%'), `LIKE('hello world', '_ELLo%', true)` = LIKE('hello world', '_ELLo%', true), `LIKE('hello world', '_ELLo%', false)` = LIKE('hello world', '_ELLo%', false) | fields `LIKE('hello world', '_ello%')`, `LIKE('hello world', '_ELLo%', true)`, `LIKE('hello world', '_ELLo%', false)` - fetched rows / total rows = 1/1 - +-------------------------------+-------------------------------------+--------------------------------------+ - | LIKE('hello world', '_ello%') | LIKE('hello world', '_ELLo%', true) | LIKE('hello world', '_ELLo%', false) | - |-------------------------------+-------------------------------------+--------------------------------------| - | True | False | True | - +-------------------------------+-------------------------------------+--------------------------------------+ - - -Limitation: The pushdown of the LIKE function to a DSL wildcard query is supported only for keyword fields. - -ILIKE ----- - -Description ->>>>>>>>>>> - -Usage: ilike(string, PATTERN) return true if the string match the PATTERN, PATTERN is **case-insensitive**. - -There are two wildcards often used in conjunction with the ILIKE operator: - -* ``%`` - The percent sign represents zero, one, or multiple characters -* ``_`` - The underscore represents a single character - -Argument type: STRING, STRING - -Return type: INTEGER - -Example:: - - os> source=people | eval `ILIKE('hello world', '_ELLo%')` = ILIKE('hello world', '_ELLo%') | fields `ILIKE('hello world', '_ELLo%')` - fetched rows / total rows = 1/1 - +--------------------------------+ - | ILIKE('hello world', '_ELLo%') | - |--------------------------------| - | True | - +--------------------------------+ - - -Limitation: The pushdown of the ILIKE function to a DSL wildcard query is supported only for keyword fields. - -LOCATE -------- - -Description ->>>>>>>>>>> - -Usage: locate(substr, str[, start]) returns the position of the first occurrence of substring substr in string str, starting searching from position start. If start is not specified, it defaults to 1 (the beginning of the string). Returns 0 if substr is not found. If any argument is NULL, the function returns NULL. - -Argument type: STRING, STRING[, INTEGER] - -Return type: INTEGER - -Example:: - - os> source=people | eval `LOCATE('world', 'helloworld')` = LOCATE('world', 'helloworld'), `LOCATE('invalid', 'helloworld')` = LOCATE('invalid', 'helloworld'), `LOCATE('world', 'helloworld', 6)` = LOCATE('world', 'helloworld', 6) | fields `LOCATE('world', 'helloworld')`, `LOCATE('invalid', 'helloworld')`, `LOCATE('world', 'helloworld', 6)` - fetched rows / total rows = 1/1 - +-------------------------------+---------------------------------+----------------------------------+ - | LOCATE('world', 'helloworld') | LOCATE('invalid', 'helloworld') | LOCATE('world', 'helloworld', 6) | - |-------------------------------+---------------------------------+----------------------------------| - | 6 | 0 | 6 | - +-------------------------------+---------------------------------+----------------------------------+ - - -LOWER ------ - -Description ->>>>>>>>>>> - -Usage: lower(string) converts the string to lowercase. - -Argument type: STRING - -Return type: STRING - -Example:: - - os> source=people | eval `LOWER('helloworld')` = LOWER('helloworld'), `LOWER('HELLOWORLD')` = LOWER('HELLOWORLD') | fields `LOWER('helloworld')`, `LOWER('HELLOWORLD')` - fetched rows / total rows = 1/1 - +---------------------+---------------------+ - | LOWER('helloworld') | LOWER('HELLOWORLD') | - |---------------------+---------------------| - | helloworld | helloworld | - +---------------------+---------------------+ - - -LTRIM ------ - -Description ->>>>>>>>>>> - -Usage: ltrim(str) trims leading space characters from the string. - -Argument type: STRING - -Return type: STRING - -Example:: - - os> source=people | eval `LTRIM(' hello')` = LTRIM(' hello'), `LTRIM('hello ')` = LTRIM('hello ') | fields `LTRIM(' hello')`, `LTRIM('hello ')` - fetched rows / total rows = 1/1 - +-------------------+-------------------+ - | LTRIM(' hello') | LTRIM('hello ') | - |-------------------+-------------------| - | hello | hello | - +-------------------+-------------------+ - - -POSITION --------- - -Description ->>>>>>>>>>> - -Usage: The syntax POSITION(substr IN str) returns the position of the first occurrence of substring substr in string str. Returns 0 if substr is not in str. Returns NULL if any argument is NULL. - -Argument type: STRING, STRING - -Return type INTEGER - -(STRING IN STRING) -> INTEGER - -Example:: - - os> source=people | eval `POSITION('world' IN 'helloworld')` = POSITION('world' IN 'helloworld'), `POSITION('invalid' IN 'helloworld')`= POSITION('invalid' IN 'helloworld') | fields `POSITION('world' IN 'helloworld')`, `POSITION('invalid' IN 'helloworld')` - fetched rows / total rows = 1/1 - +-----------------------------------+-------------------------------------+ - | POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') | - |-----------------------------------+-------------------------------------| - | 6 | 0 | - +-----------------------------------+-------------------------------------+ - - -REPLACE --------- - -Description ->>>>>>>>>>> - -Usage: replace(str, pattern, replacement) returns a string with all occurrences of the pattern replaced by the replacement string in str. If any argument is NULL, the function returns NULL. - -**Regular Expression Support**: The pattern argument supports Java regex syntax, including: - -Argument type: STRING, STRING (regex pattern), STRING (replacement) - -Return type: STRING - -**Important - Regex Special Characters**: The pattern is interpreted as a regular expression. Characters like ``.``, ``*``, ``+``, ``[``, ``]``, ``(``, ``)``, ``{``, ``}``, ``^``, ``$``, ``|``, ``?``, and ``\`` have special meaning in regex. To match them literally, escape with backslashes: - -* To match ``example.com``: use ``'example\\.com'`` (escape the dots) -* To match ``value*``: use ``'value\\*'`` (escape the asterisk) -* To match ``price+tax``: use ``'price\\+tax'`` (escape the plus) - -For strings with many special characters, use ``\\Q...\\E`` to quote the entire literal string (e.g., ``'\\Qhttps://example.com/path?id=123\\E'`` matches that exact URL). - -Literal String Replacement Examples:: - - os> source=people | eval `REPLACE('helloworld', 'world', 'universe')` = REPLACE('helloworld', 'world', 'universe'), `REPLACE('helloworld', 'invalid', 'universe')` = REPLACE('helloworld', 'invalid', 'universe') | fields `REPLACE('helloworld', 'world', 'universe')`, `REPLACE('helloworld', 'invalid', 'universe')` - fetched rows / total rows = 1/1 - +--------------------------------------------+----------------------------------------------+ - | REPLACE('helloworld', 'world', 'universe') | REPLACE('helloworld', 'invalid', 'universe') | - |--------------------------------------------+----------------------------------------------| - | hellouniverse | helloworld | - +--------------------------------------------+----------------------------------------------+ - -Escaping Special Characters Examples:: - - os> source=people | eval `Replace domain` = REPLACE('api.example.com', 'example\\.com', 'newsite.org'), `Replace with quote` = REPLACE('https://api.example.com/v1', '\\Qhttps://api.example.com\\E', 'http://localhost:8080') | fields `Replace domain`, `Replace with quote` - fetched rows / total rows = 1/1 - +-----------------+--------------------------+ - | Replace domain | Replace with quote | - |-----------------+--------------------------| - | api.newsite.org | http://localhost:8080/v1 | - +-----------------+--------------------------+ - -Regex Pattern Examples:: - - os> source=people | eval `Remove digits` = REPLACE('test123', '\\d+', ''), `Collapse spaces` = REPLACE('hello world', ' +', ' '), `Remove special` = REPLACE('hello@world!', '[^a-zA-Z]', '') | fields `Remove digits`, `Collapse spaces`, `Remove special` - fetched rows / total rows = 1/1 - +---------------+-----------------+----------------+ - | Remove digits | Collapse spaces | Remove special | - |---------------+-----------------+----------------| - | test | hello world | helloworld | - +---------------+-----------------+----------------+ - -Capture Group and Backreference Examples:: - - os> source=people | eval `Swap date` = REPLACE('1/14/2023', '^(\\d{1,2})/(\\d{1,2})/', '$2/$1/'), `Reverse words` = REPLACE('Hello World', '(\\w+) (\\w+)', '$2 $1'), `Extract domain` = REPLACE('user@example.com', '.*@(.+)', '$1') | fields `Swap date`, `Reverse words`, `Extract domain` - fetched rows / total rows = 1/1 - +-----------+---------------+----------------+ - | Swap date | Reverse words | Extract domain | - |-----------+---------------+----------------| - | 14/1/2023 | World Hello | example.com | - +-----------+---------------+----------------+ - -Advanced Regex Examples:: - - os> source=people | eval `Clean phone` = REPLACE('(555) 123-4567', '[^0-9]', ''), `Remove vowels` = REPLACE('hello world', '[aeiou]', ''), `Add prefix` = REPLACE('test', '^', 'pre_') | fields `Clean phone`, `Remove vowels`, `Add prefix` - fetched rows / total rows = 1/1 - +-------------+---------------+------------+ - | Clean phone | Remove vowels | Add prefix | - |-------------+---------------+------------| - | 5551234567 | hll wrld | pre_test | - +-------------+---------------+------------+ - -**Note**: When using regex patterns in PPL queries: - -* Backslashes must be escaped (use ``\\`` instead of ``\``) - e.g., ``\\d`` for digit pattern, ``\\w+`` for word characters -* Backreferences support both PCRE-style (``\1``, ``\2``, etc.) and Java-style (``$1``, ``$2``, etc.) syntax. PCRE-style backreferences are automatically converted to Java-style internally. - - -REVERSE -------- - -Description ->>>>>>>>>>> - -Usage: REVERSE(str) returns reversed string of the string supplied as an argument. - -Argument type: STRING - -Return type: STRING - -Example:: - - os> source=people | eval `REVERSE('abcde')` = REVERSE('abcde') | fields `REVERSE('abcde')` - fetched rows / total rows = 1/1 - +------------------+ - | REVERSE('abcde') | - |------------------| - | edcba | - +------------------+ - - -RIGHT ------ - -Description ->>>>>>>>>>> - -Usage: right(str, len) returns the rightmost len characters from the string str, or NULL if any argument is NULL. - -Argument type: STRING, INTEGER - -Return type: STRING - -Example:: - - os> source=people | eval `RIGHT('helloworld', 5)` = RIGHT('helloworld', 5), `RIGHT('HELLOWORLD', 0)` = RIGHT('HELLOWORLD', 0) | fields `RIGHT('helloworld', 5)`, `RIGHT('HELLOWORLD', 0)` - fetched rows / total rows = 1/1 - +------------------------+------------------------+ - | RIGHT('helloworld', 5) | RIGHT('HELLOWORLD', 0) | - |------------------------+------------------------| - | world | | - +------------------------+------------------------+ - - -RTRIM ------ - -Description ->>>>>>>>>>> - -Usage: rtrim(str) trims trailing space characters from the string. - -Argument type: STRING - -Return type: STRING - -Example:: - - os> source=people | eval `RTRIM(' hello')` = RTRIM(' hello'), `RTRIM('hello ')` = RTRIM('hello ') | fields `RTRIM(' hello')`, `RTRIM('hello ')` - fetched rows / total rows = 1/1 - +-------------------+-------------------+ - | RTRIM(' hello') | RTRIM('hello ') | - |-------------------+-------------------| - | hello | hello | - +-------------------+-------------------+ - - -SUBSTRING ---------- - -Description ->>>>>>>>>>> - -Usage: substring(str, start) or substring(str, start, length) returns substring using start and length. With no length, entire string from start is returned. - -Argument type: STRING, INTEGER, INTEGER - -Return type: STRING - -Synonyms: SUBSTR - -Example:: - - os> source=people | eval `SUBSTRING('helloworld', 5)` = SUBSTRING('helloworld', 5), `SUBSTRING('helloworld', 5, 3)` = SUBSTRING('helloworld', 5, 3) | fields `SUBSTRING('helloworld', 5)`, `SUBSTRING('helloworld', 5, 3)` - fetched rows / total rows = 1/1 - +----------------------------+-------------------------------+ - | SUBSTRING('helloworld', 5) | SUBSTRING('helloworld', 5, 3) | - |----------------------------+-------------------------------| - | oworld | owo | - +----------------------------+-------------------------------+ - - -TRIM ----- - -Description ->>>>>>>>>>> - -Argument Type: STRING - -Return type: STRING - -Example:: - - os> source=people | eval `TRIM(' hello')` = TRIM(' hello'), `TRIM('hello ')` = TRIM('hello ') | fields `TRIM(' hello')`, `TRIM('hello ')` - fetched rows / total rows = 1/1 - +------------------+------------------+ - | TRIM(' hello') | TRIM('hello ') | - |------------------+------------------| - | hello | hello | - +------------------+------------------+ - - -UPPER ------ - -Description ->>>>>>>>>>> - -Usage: upper(string) converts the string to uppercase. - -Argument type: STRING - -Return type: STRING - -Example:: - - os> source=people | eval `UPPER('helloworld')` = UPPER('helloworld'), `UPPER('HELLOWORLD')` = UPPER('HELLOWORLD') | fields `UPPER('helloworld')`, `UPPER('HELLOWORLD')` - fetched rows / total rows = 1/1 - +---------------------+---------------------+ - | UPPER('helloworld') | UPPER('HELLOWORLD') | - |---------------------+---------------------| - | HELLOWORLD | HELLOWORLD | - +---------------------+---------------------+ - - -REGEXP_REPLACE -------------- - -Description ->>>>>>>>>>> - -Usage: regexp_replace(str, pattern, replacement) replace all substrings of the string value that match pattern with replacement and returns modified string value. - -Argument type: STRING, STRING, STRING - -Return type: STRING - -Synonyms: `REPLACE`_ - -Example:: - - os> source=people | eval `DOMAIN` = REGEXP_REPLACE('https://opensearch.org/downloads/', '^https?://(?:www\.)?([^/]+)/.*$', '\1') | fields `DOMAIN` - fetched rows / total rows = 1/1 - +----------------+ - | DOMAIN | - |----------------| - | opensearch.org | - +----------------+ - diff --git a/docs/user/ppl/functions/system.md b/docs/user/ppl/functions/system.md new file mode 100644 index 00000000000..4eb2aeb8114 --- /dev/null +++ b/docs/user/ppl/functions/system.md @@ -0,0 +1,29 @@ +# System Functions + +## TYPEOF + +### Description + +Usage: typeof(expr) function returns name of the data type of the value that is passed to it. This can be helpful for troubleshooting or dynamically constructing SQL queries. +Argument type: ANY +Return type: STRING + +Example + +```ppl +source=people +| eval `typeof(date)` = typeof(DATE('2008-04-14')), `typeof(int)` = typeof(1), `typeof(now())` = typeof(now()), `typeof(column)` = typeof(accounts) +| fields `typeof(date)`, `typeof(int)`, `typeof(now())`, `typeof(column)` +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++--------------+-------------+---------------+----------------+ +| typeof(date) | typeof(int) | typeof(now()) | typeof(column) | +|--------------+-------------+---------------+----------------| +| DATE | INT | TIMESTAMP | STRUCT | ++--------------+-------------+---------------+----------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/functions/system.rst b/docs/user/ppl/functions/system.rst deleted file mode 100644 index 698933a3c47..00000000000 --- a/docs/user/ppl/functions/system.rst +++ /dev/null @@ -1,31 +0,0 @@ -================ -System Functions -================ - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -TYPEOF ------- - -Description ->>>>>>>>>>> - -Usage: typeof(expr) function returns name of the data type of the value that is passed to it. This can be helpful for troubleshooting or dynamically constructing SQL queries. - -Argument type: ANY - -Return type: STRING - -Example:: - - os> source=people | eval `typeof(date)` = typeof(DATE('2008-04-14')), `typeof(int)` = typeof(1), `typeof(now())` = typeof(now()), `typeof(column)` = typeof(accounts) | fields `typeof(date)`, `typeof(int)`, `typeof(now())`, `typeof(column)` - fetched rows / total rows = 1/1 - +--------------+-------------+---------------+----------------+ - | typeof(date) | typeof(int) | typeof(now()) | typeof(column) | - |--------------+-------------+---------------+----------------| - | DATE | INTEGER | TIMESTAMP | OBJECT | - +--------------+-------------+---------------+----------------+ diff --git a/docs/user/ppl/general/comments.md b/docs/user/ppl/general/comments.md new file mode 100644 index 00000000000..224682ee0b4 --- /dev/null +++ b/docs/user/ppl/general/comments.md @@ -0,0 +1,49 @@ +# Comments + +Comments are not evaluated texts. PPL supports both line comments and block comments. +## Line Comments + +Line comments begin with two slashes ( // ) and end with a new line. +Example + +```ppl +source=accounts +| top gender // finds most common gender of all the accounts +``` + +Expected output: + +```text +fetched rows / total rows = 2/2 ++--------+-------+ +| gender | count | +|--------+-------| +| M | 3 | +| F | 1 | ++--------+-------+ +``` + +## Block Comments + +Block comments begin with a slash followed by an asterisk ( /\* ) and end with an asterisk followed by a slash ( \*/ ). +Example + +```ppl +source=accounts +| dedup 2 gender /* dedup the document with gender field keep 2 duplication */ +| fields account_number, gender +``` + +Expected output: + +```text +fetched rows / total rows = 3/3 ++----------------+--------+ +| account_number | gender | +|----------------+--------| +| 13 | F | +| 1 | M | +| 6 | M | ++----------------+--------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/general/comments.rst b/docs/user/ppl/general/comments.rst deleted file mode 100644 index a0994e970c0..00000000000 --- a/docs/user/ppl/general/comments.rst +++ /dev/null @@ -1,44 +0,0 @@ -======== -Comments -======== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Comments are not evaluated texts. PPL supports both line comments and block comments. - -Line Comments -------------- -Line comments begin with two slashes ( // ) and end with a new line. - -Example:: - - os> source=accounts | top gender // finds most common gender of all the accounts - fetched rows / total rows = 2/2 - +----------+ - | gender | - |----------| - | M | - | F | - +----------+ - -Block Comments --------------- -Block comments begin with a slash followed by an asterisk ( /\* ) and end with an asterisk followed by a slash ( \*/ ). - -Example:: - - os> source=accounts | dedup 2 gender /* dedup the document with gender field keep 2 duplication */ | fields account_number, gender - fetched rows / total rows = 3/3 - +------------------+----------+ - | account_number | gender | - |------------------+----------| - | 1 | M | - | 6 | M | - | 13 | F | - +------------------+----------+ - diff --git a/docs/user/ppl/general/datatypes.md b/docs/user/ppl/general/datatypes.md new file mode 100644 index 00000000000..b9e1cac1249 --- /dev/null +++ b/docs/user/ppl/general/datatypes.md @@ -0,0 +1,327 @@ +## + +# Data Types + +## Overview + +### PPL Data Types + +The PPL support the following data types. + +| PPL Data Type | +| --- | +| boolean | +| tinyint | +| smallint | +| int | +| bigint | +| float | +| double | +| string | +| timestamp | +| date | +| time | +| interval | +| ip | +| geo_point | +| binary | +| struct | +| array | + +### Data Types Mapping + +The table below list the mapping between OpenSearch Data Type, PPL Data Type and SQL Type. + +| OpenSearch Type | PPL Type | SQL Type | +| --- | --- | --- | +| boolean | boolean | BOOLEAN | +| byte | tinyint | TINYINT | +| short | smallint | SMALLINT | +| integer | int | INTEGER | +| long | bigint | BIGINT | +| float | float | REAL | +| half_float | float | FLOAT | +| scaled_float | float | DOUBLE | +| double | double | DOUBLE | +| keyword | string | VARCHAR | +| text | string | VARCHAR | +| match_only_text | string | VARCHAR | +| date | timestamp | TIMESTAMP | +| ip | ip | VARCHAR | +| binary | binary | VARBINARY | +| object | struct | STRUCT | +| nested | array | STRUCT | + +Notes: Not all the PPL Type has correspond OpenSearch Type. e.g. data and time. To use function which required such data type, user should explicit convert the data type. +## Numeric Data Types + +Numeric values ranged from -2147483648 to +2147483647 are recognized as integer with type name `int`. For others outside the range, `bigint` integer will be the data type after parsed. +## Date and Time Data Types + +The date and time data types are the types that represent temporal values and PPL plugin supports types including DATE, TIME, TIMESTAMP and INTERVAL. By default, the OpenSearch DSL uses date type as the only date and time related type, which has contained all information about an absolute time point. To integrate with PPL language, each of the types other than timestamp is holding part of temporal or timezone information, and the usage to explicitly clarify the date and time types is reflected in the datetime functions (see [Functions](../functions/datetime.md) for details), where some functions might have restrictions in the input argument type. +### Date + +Date represents the calendar date regardless of the time zone. A given date value represents a 24-hour period, or say a day, but this period varies in different timezones and might have flexible hours during Daylight Savings Time programs. Besides, the date type does not contain time information as well. The supported range is '1000-01-01' to '9999-12-31'. + +| Type | Syntax | Range | +| --- | --- | --- | +| Date | 'yyyy-MM-dd' | '0001-01-01' to '9999-12-31' | + +### Time + +Time represents the time on the clock or watch with no regard for which timezone it might be related with. Time type data does not have date information. + +| Type | Syntax | Range | +| --- | --- | --- | +| Time | 'hh:mm:ss[.fraction]' | '00:00:00.000000' to '23:59:59.999999' | + +### Timestamp + +A timestamp instance is an absolute instant independent of timezone or convention. For example, for a given point of time, if we set the timestamp of this time point into another timezone, the value should also be different accordingly. Besides, the storage of timestamp type is also different from the other types. The timestamp is converted from the current timezone to UTC for storage, and is converted back to the set timezone from UTC when retrieving. + +| Type | Syntax | Range | +| --- | --- | --- | +| Timestamp | 'yyyy-MM-dd hh:mm:ss[.fraction]' | '0001-01-01 00:00:01.000000' UTC to '9999-12-31 23:59:59.999999' | + +### Interval + +Interval data type represents a temporal duration or a period. The syntax is as follows: + +| Type | Syntax | +| --- | --- | +| Interval | INTERVAL expr unit | + +The expr is any expression that can be iterated to a quantity value eventually, see [Expressions](../functions/expressions.md) for details. The unit represents the unit for interpreting the quantity, including MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER and YEAR.The INTERVAL keyword and the unit specifier are not case sensitive. Note that there are two classes of intervals. Year-week intervals can store years, quarters, months and weeks. Day-time intervals can store days, hours, minutes, seconds and microseconds. Year-week intervals are comparable only with another year-week intervals. These two types of intervals can only comparable with the same type of themselves. +### Conversion between date and time types + +Basically the date and time types except interval can be converted to each other, but might suffer some alteration of the value or some information loss, for example extracting the time value from a timestamp value, or convert a date value to a timestamp value and so forth. Here lists the summary of the conversion rules that PPL plugin supports for each of the types: +#### Conversion from DATE + +- Since the date value does not have any time information, conversion to [Time](#time) type is not useful, and will always return a zero time value '00:00:00'. +- Conversion to timestamp is to alternate both the time value and the timezone information, and it attaches the zero time value '00:00:00' and the session timezone (UTC by default) to the date. For example, the result to covert date '2020-08-17' to timestamp type with session timezone UTC is timestamp '2020-08-17 00:00:00' UTC. + +#### Conversion from TIME + +- Time value cannot be converted to any other date and time types since it does not contain any date information, so it is not meaningful to give no date info to a date/timestamp instance. + +#### Conversion from TIMESTAMP + +- Conversion from timestamp is much more straightforward. To convert it to date is to extract the date value, and conversion to time is to extract the time value. For example, the result to convert timestamp '2020-08-17 14:09:00' UTC to date is date '2020-08-17', to time is '14:09:00'. + +## String Data Types + +A string is a sequence of characters enclosed in either single or double quotes. For example, both 'text' and "text" will be treated as string literal. +## Query Struct Data Types + +In PPL, the Struct Data Types corresponding to the [Object field type in OpenSearch](https://opensearch.org/docs/latest/field-types/supported-field-types/object/). The "." is used as the path selector when access the inner attribute of the struct data. +### Example: People + +There are three fields in test index `people`: 1) deep nested object field `city`; 2) object field of array value `account`; 3) nested field `projects` + +```bash +{ + "mappings": { + "properties": { + "city": { + "properties": { + "name": { + "type": "keyword" + }, + "location": { + "properties": { + "latitude": { + "type": "double" + } + } + } + } + }, + "account": { + "properties": { + "id": { + "type": "keyword" + } + } + }, + "projects": { + "type": "nested", + "properties": { + "name": { + "type": "keyword" + } + } + } + } + } +} + +``` + +### Example: Employees + +Here is the mapping for test index `employees_nested`. Note that field `projects` is a nested field + +```bash +{ + "mappings": { + "properties": { + "id": { + "type": "long" + }, + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "projects": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + }, + "fielddata": true + }, + "started_year": { + "type": "long" + } + } + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } +} + + +``` + +```bash +{ + "employees_nested" : [ + { + "id" : 3, + "name" : "Bob Smith", + "title" : null, + "projects" : [ + { + "name" : "AWS Redshift Spectrum querying", + "started_year" : 1990 + }, + { + "name" : "AWS Redshift security", + "started_year" : 1999 + }, + { + "name" : "AWS Aurora security", + "started_year" : 2015 + } + ] + }, + { + "id" : 4, + "name" : "Susan Smith", + "title" : "Dev Mgr", + "projects" : [ ] + }, + { + "id" : 6, + "name" : "Jane Smith", + "title" : "Software Eng 2", + "projects" : [ + { + "name" : "AWS Redshift security", + "started_year" : 1998 + }, + { + "name" : "AWS Hello security", + "started_year" : 2015, + "address" : [ + { + "city" : "Dallas", + "state" : "TX" + } + ] + } + ] + } + ] +} + + +``` + +### Example 1: Select struct inner attribute + +The example show fetch city (top level), city.name (second level), city.location.latitude (deeper level) struct type data from people results. + +```ppl +source=people +| fields city, city.name, city.location.latitude +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------------------------------------+-----------+------------------------+ +| city | city.name | city.location.latitude | +|-----------------------------------------------------+-----------+------------------------| +| {'name': 'Seattle', 'location': {'latitude': 10.5}} | Seattle | 10.5 | ++-----------------------------------------------------+-----------+------------------------+ +``` + +### Example 2: Group by struct inner attribute + +The example show group by object field inner attribute. + +```ppl +source=people +| stats count() by city.name +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+-----------+ +| count() | city.name | +|---------+-----------| +| 1 | Seattle | ++---------+-----------+ +``` + +### Example 3: Selecting Field of Array Value + +Select deeper level for object fields of array value which returns the first element in the array. For example, because inner field `accounts.id` has three values instead of a tuple in this document, the first entry is returned. + +```ppl +source = people +| fields accounts, accounts.id +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++-----------------------+-------------+ +| accounts | accounts.id | +|-----------------------+-------------| +| [{'id': 1},{'id': 2}] | 1 | ++-----------------------+-------------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/general/datatypes.rst b/docs/user/ppl/general/datatypes.rst deleted file mode 100644 index 1f73ca3531a..00000000000 --- a/docs/user/ppl/general/datatypes.rst +++ /dev/null @@ -1,392 +0,0 @@ - -========== -Data Types -========== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Overview -======== - -PPL Data Types -------------------- - -The PPL support the following data types. - -+---------------+ -| PPL Data Type | -+===============+ -| boolean | -+---------------+ -| tinyint | -+---------------+ -| smallint | -+---------------+ -| int | -+---------------+ -| bigint | -+---------------+ -| float | -+---------------+ -| double | -+---------------+ -| string | -+---------------+ -| timestamp | -+---------------+ -| date | -+---------------+ -| time | -+---------------+ -| interval | -+---------------+ -| ip | -+---------------+ -| geo_point | -+---------------+ -| binary | -+---------------+ -| struct | -+---------------+ -| array | -+---------------+ - -Data Types Mapping ------------------- - -The table below list the mapping between OpenSearch Data Type, PPL Data Type and SQL Type. - -+-----------------+---------------+-----------+ -| OpenSearch Type | PPL Type | SQL Type | -+=================+===============+===========+ -| boolean | boolean | BOOLEAN | -+-----------------+---------------+-----------+ -| byte | tinyint | TINYINT | -+-----------------+---------------+-----------+ -| short | smallint | SMALLINT | -+-----------------+---------------+-----------+ -| integer | int | INTEGER | -+-----------------+---------------+-----------+ -| long | bigint | BIGINT | -+-----------------+---------------+-----------+ -| float | float | REAL | -+-----------------+---------------+-----------+ -| half_float | float | FLOAT | -+-----------------+---------------+-----------+ -| scaled_float | float | DOUBLE | -+-----------------+---------------+-----------+ -| double | double | DOUBLE | -+-----------------+---------------+-----------+ -| keyword | string | VARCHAR | -+-----------------+---------------+-----------+ -| text | string | VARCHAR | -+-----------------+---------------+-----------+ -| match_only_text | string | VARCHAR | -+-----------------+---------------+-----------+ -| date | timestamp | TIMESTAMP | -+-----------------+---------------+-----------+ -| ip | ip | VARCHAR | -+-----------------+---------------+-----------+ -| binary | binary | VARBINARY | -+-----------------+---------------+-----------+ -| object | struct | STRUCT | -+-----------------+---------------+-----------+ -| nested | array | STRUCT | -+-----------------+---------------+-----------+ - -Notes: Not all the PPL Type has correspond OpenSearch Type. e.g. data and time. To use function which required such data type, user should explicit convert the data type. - - - -Numeric Data Types -================== - -Numeric values ranged from -2147483648 to +2147483647 are recognized as integer with type name ``int``. For others outside the range, ``bigint`` integer will be the data type after parsed. - - -Date and Time Data Types -======================== - -The date and time data types are the types that represent temporal values and PPL plugin supports types including DATE, TIME, TIMESTAMP and INTERVAL. By default, the OpenSearch DSL uses date type as the only date and time related type, which has contained all information about an absolute time point. To integrate with PPL language, each of the types other than timestamp is holding part of temporal or timezone information, and the usage to explicitly clarify the date and time types is reflected in the datetime functions (see `Functions `_ for details), where some functions might have restrictions in the input argument type. - - -Date ----- - -Date represents the calendar date regardless of the time zone. A given date value represents a 24-hour period, or say a day, but this period varies in different timezones and might have flexible hours during Daylight Savings Time programs. Besides, the date type does not contain time information as well. The supported range is '1000-01-01' to '9999-12-31'. - -+------+--------------+------------------------------+ -| Type | Syntax | Range | -+======+==============+==============================+ -| Date | 'yyyy-MM-dd' | '0001-01-01' to '9999-12-31' | -+------+--------------+------------------------------+ - - -Time ----- - -Time represents the time on the clock or watch with no regard for which timezone it might be related with. Time type data does not have date information. - -+------+-----------------------+----------------------------------------+ -| Type | Syntax | Range | -+======+=======================+========================================+ -| Time | 'hh:mm:ss[.fraction]' | '00:00:00.000000' to '23:59:59.999999' | -+------+-----------------------+----------------------------------------+ - - -Timestamp ---------- - -A timestamp instance is an absolute instant independent of timezone or convention. For example, for a given point of time, if we set the timestamp of this time point into another timezone, the value should also be different accordingly. Besides, the storage of timestamp type is also different from the other types. The timestamp is converted from the current timezone to UTC for storage, and is converted back to the set timezone from UTC when retrieving. - -+-----------+----------------------------------+------------------------------------------------------------------+ -| Type | Syntax | Range | -+===========+==================================+==================================================================+ -| Timestamp | 'yyyy-MM-dd hh:mm:ss[.fraction]' | '0001-01-01 00:00:01.000000' UTC to '9999-12-31 23:59:59.999999' | -+-----------+----------------------------------+------------------------------------------------------------------+ - - -Interval --------- - -Interval data type represents a temporal duration or a period. The syntax is as follows: - -+----------+--------------------+ -| Type | Syntax | -+==========+====================+ -| Interval | INTERVAL expr unit | -+----------+--------------------+ - -The expr is any expression that can be iterated to a quantity value eventually, see `Expressions `_ for details. The unit represents the unit for interpreting the quantity, including MICROSECOND, SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER and YEAR.The INTERVAL keyword and the unit specifier are not case sensitive. Note that there are two classes of intervals. Year-week intervals can store years, quarters, months and weeks. Day-time intervals can store days, hours, minutes, seconds and microseconds. Year-week intervals are comparable only with another year-week intervals. These two types of intervals can only comparable with the same type of themselves. - - -Conversion between date and time types --------------------------------------- - -Basically the date and time types except interval can be converted to each other, but might suffer some alteration of the value or some information loss, for example extracting the time value from a timestamp value, or convert a date value to a timestamp value and so forth. Here lists the summary of the conversion rules that PPL plugin supports for each of the types: - -Conversion from DATE ->>>>>>>>>>>>>>>>>>>> - -- Since the date value does not have any time information, conversion to `Time`_ type is not useful, and will always return a zero time value '00:00:00'. - -- Conversion to timestamp is to alternate both the time value and the timezone information, and it attaches the zero time value '00:00:00' and the session timezone (UTC by default) to the date. For example, the result to covert date '2020-08-17' to timestamp type with session timezone UTC is timestamp '2020-08-17 00:00:00' UTC. - - -Conversion from TIME ->>>>>>>>>>>>>>>>>>>> - -- Time value cannot be converted to any other date and time types since it does not contain any date information, so it is not meaningful to give no date info to a date/timestamp instance. - - -Conversion from TIMESTAMP ->>>>>>>>>>>>>>>>>>>>>>>>> - -- Conversion from timestamp is much more straightforward. To convert it to date is to extract the date value, and conversion to time is to extract the time value. For example, the result to convert timestamp '2020-08-17 14:09:00' UTC to date is date '2020-08-17', to time is '14:09:00'. - - -String Data Types -================= - -A string is a sequence of characters enclosed in either single or double quotes. For example, both 'text' and "text" will be treated as string literal. - - -Query Struct Data Types -======================= - -In PPL, the Struct Data Types corresponding to the `Object field type in OpenSearch `_. The "." is used as the path selector when access the inner attribute of the struct data. - -Example: People ---------------- - -There are three fields in test index ``people``: 1) deep nested object field ``city``; 2) object field of array value ``account``; 3) nested field ``projects``:: - - { - "mappings": { - "properties": { - "city": { - "properties": { - "name": { - "type": "keyword" - }, - "location": { - "properties": { - "latitude": { - "type": "double" - } - } - } - } - }, - "account": { - "properties": { - "id": { - "type": "keyword" - } - } - }, - "projects": { - "type": "nested", - "properties": { - "name": { - "type": "keyword" - } - } - } - } - } - } - -Example: Employees ------------------- - -Here is the mapping for test index ``employees_nested``. Note that field ``projects`` is a nested field:: - - { - "mappings": { - "properties": { - "id": { - "type": "long" - }, - "name": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - }, - "projects": { - "type": "nested", - "properties": { - "name": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - }, - "fielddata": true - }, - "started_year": { - "type": "long" - } - } - }, - "title": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - } - } - } - } - - -Result set:: - - { - "employees_nested" : [ - { - "id" : 3, - "name" : "Bob Smith", - "title" : null, - "projects" : [ - { - "name" : "AWS Redshift Spectrum querying", - "started_year" : 1990 - }, - { - "name" : "AWS Redshift security", - "started_year" : 1999 - }, - { - "name" : "AWS Aurora security", - "started_year" : 2015 - } - ] - }, - { - "id" : 4, - "name" : "Susan Smith", - "title" : "Dev Mgr", - "projects" : [ ] - }, - { - "id" : 6, - "name" : "Jane Smith", - "title" : "Software Eng 2", - "projects" : [ - { - "name" : "AWS Redshift security", - "started_year" : 1998 - }, - { - "name" : "AWS Hello security", - "started_year" : 2015, - "address" : [ - { - "city" : "Dallas", - "state" : "TX" - } - ] - } - ] - } - ] - } - - -Example 1: Select struct inner attribute ----------------------------------------- - -The example show fetch city (top level), city.name (second level), city.location.latitude (deeper level) struct type data from people results. - -PPL query:: - - os> source=people | fields city, city.name, city.location.latitude; - fetched rows / total rows = 1/1 - +-----------------------------------------------------+-----------+------------------------+ - | city | city.name | city.location.latitude | - |-----------------------------------------------------+-----------+------------------------| - | {'name': 'Seattle', 'location': {'latitude': 10.5}} | Seattle | 10.5 | - +-----------------------------------------------------+-----------+------------------------+ - - -Example 2: Group by struct inner attribute ------------------------------------------- - -The example show group by object field inner attribute. - -PPL query:: - - os> source=people | stats count() by city.name; - fetched rows / total rows = 1/1 - +---------+-----------+ - | count() | city.name | - |---------+-----------| - | 1 | Seattle | - +---------+-----------+ - -Example 3: Selecting Field of Array Value ------------------------------------------ - -Select deeper level for object fields of array value which returns the first element in the array. For example, because inner field ``accounts.id`` has three values instead of a tuple in this document, the first entry is returned.:: - - os> source = people | fields accounts, accounts.id; - fetched rows / total rows = 1/1 - +-----------------------+-------------+ - | accounts | accounts.id | - |-----------------------+-------------| - | [{'id': 1},{'id': 2}] | 1 | - +-----------------------+-------------+ diff --git a/docs/user/ppl/general/identifiers.md b/docs/user/ppl/general/identifiers.md new file mode 100644 index 00000000000..c532e9929f3 --- /dev/null +++ b/docs/user/ppl/general/identifiers.md @@ -0,0 +1,188 @@ +# Identifiers + +## Introduction + +Identifiers are used for naming your database objects, such as index name, field name, alias etc. Basically there are two types of identifiers: regular identifiers and delimited identifiers. +## Regular Identifiers + +### Description + +A regular identifier is a string of characters that must start with ASCII letter (lower or upper case). The subsequent character can be a combination of letter, digit, underscore (`_`). It cannot be a reversed key word. And whitespace and other special characters are not allowed. +For OpenSearch, the following identifiers are supported extensionally: +1. Identifiers prefixed by dot `.`: this is called hidden index in OpenSearch, for example `.opensearch_dashboards`. +2. Identifiers prefixed by at sign `@`: this is common for meta fields generated in Logstash ingestion. +3. Identifiers with `-` in the middle: this is mostly the case for index name with date information. +4. Identifiers with star `*` present: this is mostly an index pattern for wildcard match. + +Index name with date suffix separated by dash or dots, such as `cwl-2020.01.11` or `logs-7.0-2020.01.11`, is common for those created by Logstash or FileBeat ingestion. So, this kind of identifier used as index name is also supported without the need of being quoted for user convenience. In this case, wildcard within date pattern is also allowed to search for data across indices of different date range. For example, you can use `logs-2020.1*` to search in indices for October, November and December 2020. +### Examples + +Here are examples for using index pattern directly without quotes + +```ppl +source=accounts +| fields account_number, firstname, lastname +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-----------+----------+ +| account_number | firstname | lastname | +|----------------+-----------+----------| +| 1 | Amber | Duke | +| 6 | Hattie | Bond | +| 13 | Nanette | Bates | +| 18 | Dale | Adams | ++----------------+-----------+----------+ +``` + +## Delimited Identifiers + +### Description + +A delimited identifier is an identifier enclosed in back ticks `` ` ``. In this case, the identifier enclosed is not necessarily a regular identifier. In other words, it can contain any special character not allowed by regular identifier. +### Use Cases + +Here are typical examples of the use of delimited identifiers: +1. Identifiers of reserved key word name +2. Identifiers with dot `.` present: similarly as `-` in index name to include date information, it is required to be quoted so parser can differentiate it from identifier with qualifiers. +3. Identifiers with other special character: OpenSearch has its own rule which allows more special character, for example Unicode character is supported in index name. + +### Examples + +Here are examples for quoting an index name by back ticks + +```ppl +source=`accounts` +| fields `account_number` +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+ +| account_number | +|----------------| +| 1 | +| 6 | +| 13 | +| 18 | ++----------------+ +``` + +## Cross-Cluster Index Identifiers + +### Description + +A cross-cluster index identifier is an index identifier with a prefix `:`. The cluster identifier could contain star `*`. This is mostly an cluster pattern for wildcard match. +### Use Cases + +It is used to identify an index on a remote cluster for cross-cluster search. +### Examples + +For example, if you setup a connection between the local cluster and a remote cluster `my_cluster`, then you can run `source=my_cluster:accounts` to query the `accounts` index at `my_cluster`. +## Case Sensitivity + +### Description + +Identifiers are treated in case sensitive manner. So it must be exactly same as what is stored in OpenSearch. +### Examples + +For example, if you run `source=Accounts`, it will end up with an index not found exception from our plugin because the actual index name is under lower case. +## Multiple Indices + +### Description + +To query multiple indices, you could +1. Include `*` in index name, this is an index pattern for wildcard match. +2. Include multiple indices and seperated them by `,`. +3. Delimited multiple indices and seperated them by `,`. Note: no space allowed between each index. + +### Examples + +Query wildcard indices + +```ppl +source=acc* +| stats count() +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| count() | +|---------| +| 5 | ++---------+ +``` + +Query multiple indices seperated by `,` + +```ppl +source=accounts, account2 +| stats count() +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| count() | +|---------| +| 5 | ++---------+ +``` + +Query delimited multiple indices seperated by `,` + +```ppl +source=`accounts,account2` +| stats count() +``` + +Expected output: + +```text +fetched rows / total rows = 1/1 ++---------+ +| count() | +|---------| +| 5 | ++---------+ +``` + +## Metadata Identifiers + +### Description + +One can also provide meta-field name(s) to retrieve reserved-fields (beginning with underscore) from OpenSearch documents. Meta-fields are not output +as default field list (`search source=`) and must be explicitly included to be returned. +### Examples + +Query metadata fields: + +```ppl +source=accounts +| fields firstname, lastname, _index, _sort +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++-----------+----------+----------+-------+ +| firstname | lastname | _index | _sort | +|-----------+----------+----------+-------| +| Amber | Duke | accounts | -2 | +| Hattie | Bond | accounts | -2 | +| Nanette | Bates | accounts | -2 | +| Dale | Adams | accounts | -2 | ++-----------+----------+----------+-------+ +``` + \ No newline at end of file diff --git a/docs/user/ppl/general/identifiers.rst b/docs/user/ppl/general/identifiers.rst deleted file mode 100644 index af4e81514c8..00000000000 --- a/docs/user/ppl/general/identifiers.rst +++ /dev/null @@ -1,188 +0,0 @@ -=========== -Identifiers -=========== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Introduction -============ - -Identifiers are used for naming your database objects, such as index name, field name, alias etc. Basically there are two types of identifiers: regular identifiers and delimited identifiers. - - -Regular Identifiers -=================== - -Description ------------ - -A regular identifier is a string of characters that must start with ASCII letter (lower or upper case). The subsequent character can be a combination of letter, digit, underscore (``_``). It cannot be a reversed key word. And whitespace and other special characters are not allowed. - -For OpenSearch, the following identifiers are supported extensionally: - -1. Identifiers prefixed by dot ``.``: this is called hidden index in OpenSearch, for example ``.opensearch_dashboards``. -2. Identifiers prefixed by at sign ``@``: this is common for meta fields generated in Logstash ingestion. -3. Identifiers with ``-`` in the middle: this is mostly the case for index name with date information. -4. Identifiers with star ``*`` present: this is mostly an index pattern for wildcard match. - -Index name with date suffix separated by dash or dots, such as ``cwl-2020.01.11`` or ``logs-7.0-2020.01.11``, is common for those created by Logstash or FileBeat ingestion. So, this kind of identifier used as index name is also supported without the need of being quoted for user convenience. In this case, wildcard within date pattern is also allowed to search for data across indices of different date range. For example, you can use ``logs-2020.1*`` to search in indices for October, November and December 2020. - -Examples --------- - -Here are examples for using index pattern directly without quotes:: - - os> source=accounts | fields account_number, firstname, lastname; - fetched rows / total rows = 4/4 - +----------------+-----------+----------+ - | account_number | firstname | lastname | - |----------------+-----------+----------| - | 1 | Amber | Duke | - | 6 | Hattie | Bond | - | 13 | Nanette | Bates | - | 18 | Dale | Adams | - +----------------+-----------+----------+ - - -Delimited Identifiers -===================== - -Description ------------ - -A delimited identifier is an identifier enclosed in back ticks `````. In this case, the identifier enclosed is not necessarily a regular identifier. In other words, it can contain any special character not allowed by regular identifier. - -Use Cases ---------- - -Here are typical examples of the use of delimited identifiers: - -1. Identifiers of reserved key word name -2. Identifiers with dot ``.`` present: similarly as ``-`` in index name to include date information, it is required to be quoted so parser can differentiate it from identifier with qualifiers. -3. Identifiers with other special character: OpenSearch has its own rule which allows more special character, for example Unicode character is supported in index name. - -Examples --------- - -Here are examples for quoting an index name by back ticks:: - - os> source=`accounts` | fields `account_number`; - fetched rows / total rows = 4/4 - +----------------+ - | account_number | - |----------------| - | 1 | - | 6 | - | 13 | - | 18 | - +----------------+ - - -Cross-Cluster Index Identifiers -=================== - -Description ------------ - -A cross-cluster index identifier is an index identifier with a prefix ``:``. The cluster identifier could contain star ``*``. This is mostly an cluster pattern for wildcard match. - -Use Cases ---------- - -It is used to identify an index on a remote cluster for cross-cluster search. - -Examples --------- - -For example, if you setup a connection between the local cluster and a remote cluster ``my_cluster``, then you can run ``source=my_cluster:accounts`` to query the ``accounts`` index at ``my_cluster``. - - -Case Sensitivity -================ - -Description ------------ - -Identifiers are treated in case sensitive manner. So it must be exactly same as what is stored in OpenSearch. - -Examples --------- - -For example, if you run ``source=Accounts``, it will end up with an index not found exception from our plugin because the actual index name is under lower case. - -Multiple Indices -================ - -Description ------------ - -To query multiple indices, you could - -1. Include ``*`` in index name, this is an index pattern for wildcard match. -2. Include multiple indices and seperated them by ``,``. -3. Delimited multiple indices and seperated them by ``,``. Note: no space allowed between each index. - - -Examples ---------- - -Query wildcard indices:: - - os> source=acc* | stats count(); - fetched rows / total rows = 1/1 - +---------+ - | count() | - |---------| - | 5 | - +---------+ - -Query multiple indices seperated by ``,``:: - - os> source=accounts, account2 | stats count(); - fetched rows / total rows = 1/1 - +---------+ - | count() | - |---------| - | 5 | - +---------+ - -Query delimited multiple indices seperated by ``,``:: - - os> source=`accounts,account2` | stats count(); - fetched rows / total rows = 1/1 - +---------+ - | count() | - |---------| - | 5 | - +---------+ - -Metadata Identifiers -==================== - -Description ------------ - -One can also provide meta-field name(s) to retrieve reserved-fields (beginning with underscore) from OpenSearch documents. Meta-fields are not output -as default field list (`search source=`) and must be explicitly included to be returned. - -Examples ---------- - -Query metadata fields:: - - os> source=accounts | fields firstname, lastname, _index, _sort; - fetched rows / total rows = 4/4 - +-----------+----------+----------+-------+ - | firstname | lastname | _index | _sort | - |-----------+----------+----------+-------| - | Amber | Duke | accounts | -2 | - | Hattie | Bond | accounts | -2 | - | Nanette | Bates | accounts | -2 | - | Dale | Adams | accounts | -2 | - +-----------+----------+----------+-------+ - diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md new file mode 100644 index 00000000000..a8fcb5a480b --- /dev/null +++ b/docs/user/ppl/index.md @@ -0,0 +1,100 @@ +# OpenSearch PPL Reference Manual + +### Overview + +Piped Processing Language (PPL), powered by OpenSearch, enables OpenSearch users with exploration and discovery of, and finding search patterns in data stored in OpenSearch, using a set of commands delimited by pipes (\|). These are essentially read-only requests to process data and return results. + +Currently, OpenSearch users can query data using either Query DSL or SQL. Query DSL is powerful and fast. However, it has a steep learning curve, and was not designed as a human interface to easily create ad hoc queries and explore user data. SQL allows users to extract and analyze data in OpenSearch in a declarative manner. OpenSearch now makes its search and query engine robust by introducing Piped Processing Language (PPL). It enables users to extract insights from OpenSearch with a sequence of commands delimited by pipes (\|). It supports a comprehensive set of commands including search, where, fields, rename, dedup, sort, eval, head, top and rare, and functions, operators and expressions. Even new users who have recently adopted OpenSearch, can be productive day one, if they are familiar with the pipe (\|) syntax. It enables developers, DevOps engineers, support engineers, site reliability engineers (SREs), and IT managers to effectively discover and explore log, monitoring and observability data stored in OpenSearch. + +We expand the capabilities of our Workbench, a comprehensive and integrated visual query tool currently supporting only SQL, to run on-demand PPL commands, and view and save results as text and JSON. We also add a new interactive standalone command line tool, the PPL CLI, to run on-demand PPL commands, and view and save results as text and JSON. +The query start with search command and then flowing a set of command delimited by pipe (\|). + +for example, the following query retrieve firstname and lastname from accounts if age large than 18. + +``` +source=accounts +| where age > 18 +| fields firstname, lastname +``` + +* **Interfaces** + - [Endpoint](interfaces/endpoint.md) + - [Protocol](interfaces/protocol.md) +* **Administration** + - [Plugin Settings](admin/settings.md) + - [Security Settings](admin/security.md) + - [Monitoring](admin/monitoring.md) + - [Datasource Settings](admin/datasources.md) + - [Prometheus Connector](admin/connectors/prometheus_connector.md) + - [Cross-Cluster Search](admin/cross_cluster_search.md) +* **Language Structure** + - [Identifiers](general/identifiers.md) + - [Data Types](general/datatypes.md) +* **Commands** + + The following commands are available in PPL: + **Note:** Experimental commands are ready for use, but specific parameters may change based on feedback. + +| Command Name | Version Introduced | Current Status | Command Description | +| --- | --- | --- | --- | +| [search command](cmd/search.md) | 1.0 | stable (since 1.0) | Retrieve documents from the index. | +| [where command](cmd/where.md) | 1.0 | stable (since 1.0) | Filter the search result using boolean expressions. | +| [subquery command](cmd/subquery.md) | 3.0 | experimental (since 3.0) | Embed one PPL query inside another for complex filtering and data retrieval operations. | +| [fields command](cmd/fields.md) | 1.0 | stable (since 1.0) | Keep or remove fields from the search result. | +| [rename command](cmd/rename.md) | 1.0 | stable (since 1.0) | Rename one or more fields in the search result. | +| [eval command](cmd/eval.md) | 1.0 | stable (since 1.0) | Evaluate an expression and append the result to the search result. | +| [replace command](cmd/replace.md) | 3.4 | experimental (since 3.4) | Replace text in one or more fields in the search result | +| [fillnull command](cmd/fillnull.md) | 3.0 | experimental (since 3.0) | Fill null with provided value in one or more fields in the search result. | +| [expand command](cmd/expand.md) | 3.1 | experimental (since 3.1) | Transform a single document into multiple documents by expanding a nested array field. | +| [flatten command](cmd/flatten.md) | 3.1 | experimental (since 3.1) | Flatten a struct or an object field into separate fields in a document. | +| [table command](cmd/table.md) | 3.3 | experimental (since 3.3) | Keep or remove fields from the search result using enhanced syntax options. | +| [stats command](cmd/stats.md) | 1.0 | stable (since 1.0) | Calculate aggregation from search results. | +| [eventstats command](cmd/eventstats.md) | 3.1 | experimental (since 3.1) | Calculate aggregation statistics and add them as new fields to each event. | +| [streamstats command](cmd/streamstats.md) | 3.4 | experimental (since 3.4) | Calculate cumulative or rolling statistics as events are processed in order. | +| [bin command](cmd/bin.md) | 3.3 | experimental (since 3.3) | Group numeric values into buckets of equal intervals. | +| [timechart command](cmd/timechart.md) | 3.3 | experimental (since 3.3) | Create time-based charts and visualizations. | +| [chart command](cmd/chart.md) | 3.4 | experimental (since 3.4) | Apply statistical aggregations to search results and group the data for visualizations. | +| [trendline command](cmd/trendline.md) | 3.0 | experimental (since 3.0) | Calculate moving averages of fields. | +| [sort command](cmd/sort.md) | 1.0 | stable (since 1.0) | Sort all the search results by the specified fields. | +| [reverse command](cmd/reverse.md) | 3.2 | experimental (since 3.2) | Reverse the display order of search results. | +| [head command](cmd/head.md) | 1.0 | stable (since 1.0) | Return the first N number of specified results after an optional offset in search order. | +| [dedup command](cmd/dedup.md) | 1.0 | stable (since 1.0) | Remove identical documents defined by the field from the search result. | +| [top command](cmd/top.md) | 1.0 | stable (since 1.0) | Find the most common tuple of values of all fields in the field list. | +| [rare command](cmd/rare.md) | 1.0 | stable (since 1.0) | Find the least common tuple of values of all fields in the field list. | +| [parse command](cmd/parse.md) | 1.3 | stable (since 1.3) | Parse a text field with a regular expression and append the result to the search result. | +| [grok command](cmd/grok.md) | 2.4 | stable (since 2.4) | Parse a text field with a grok pattern and append the results to the search result. | +| [rex command](cmd/rex.md) | 3.3 | experimental (since 3.3) | Extract fields from a raw text field using regular expression named capture groups. | +| [regex command](cmd/regex.md) | 3.3 | experimental (since 3.3) | Filter search results by matching field values against a regular expression pattern. | +| [spath command](cmd/spath.md) | 3.3 | experimental (since 3.3) | Extract fields from structured text data. | +| [patterns command](cmd/patterns.md) | 2.4 | stable (since 2.4) | Extract log patterns from a text field and append the results to the search result. | +| [join command](cmd/join.md) | 3.0 | stable (since 3.0) | Combine two datasets together. | +| [append command](cmd/append.md) | 3.3 | experimental (since 3.3) | Append the result of a sub-search to the bottom of the input search results. | +| [appendcol command](cmd/appendcol.md) | 3.1 | experimental (since 3.1) | Append the result of a sub-search and attach it alongside the input search results. | +| [lookup command](cmd/lookup.md) | 3.0 | experimental (since 3.0) | Add or replace data from a lookup index. | +| [multisearch command](cmd/multisearch.md) | 3.4 | experimental (since 3.4) | Execute multiple search queries and combine their results. | +| [ml command](cmd/ml.md) | 2.5 | stable (since 2.5) | Apply machine learning algorithms to analyze data. | +| [kmeans command](cmd/kmeans.md) | 1.3 | stable (since 1.3) | Apply the kmeans algorithm on the search result returned by a PPL command. | +| [ad command](cmd/ad.md) | 1.3 | deprecated (since 2.5) | Apply Random Cut Forest algorithm on the search result returned by a PPL command. | +| [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | +| [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | +| [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | + + - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting +* **Functions** + - [Aggregation Functions](functions/aggregations.md) + - [Collection Functions](functions/collection.md) + - [Condition Functions](functions/condition.md) + - [Cryptographic Functions](functions/cryptographic.md) + - [Date and Time Functions](functions/datetime.md) + - [Expressions](functions/expressions.md) + - [IP Address Functions](functions/ip.md) + - [JSON Functions](functions/json.md) + - [Math Functions](functions/math.md) + - [Relevance Functions](functions/relevance.md) + - [String Functions](functions/string.md) + - [System Functions](functions/system.md) + - [Type Conversion Functions](functions/conversion.md) +* **Optimization** + - [Optimization](../../user/optimization/optimization.rst) +* **Limitations** + - [Limitations](limitations/limitations.md) \ No newline at end of file diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst deleted file mode 100644 index 981b2de3169..00000000000 --- a/docs/user/ppl/index.rst +++ /dev/null @@ -1,137 +0,0 @@ - -=============================== -OpenSearch PPL Reference Manual -=============================== - -Overview ---------- -Piped Processing Language (PPL), powered by OpenSearch, enables OpenSearch users with exploration and discovery of, and finding search patterns in data stored in OpenSearch, using a set of commands delimited by pipes (|). These are essentially read-only requests to process data and return results. - -Currently, OpenSearch users can query data using either Query DSL or SQL. Query DSL is powerful and fast. However, it has a steep learning curve, and was not designed as a human interface to easily create ad hoc queries and explore user data. SQL allows users to extract and analyze data in OpenSearch in a declarative manner. OpenSearch now makes its search and query engine robust by introducing Piped Processing Language (PPL). It enables users to extract insights from OpenSearch with a sequence of commands delimited by pipes (|). It supports a comprehensive set of commands including search, where, fields, rename, dedup, sort, eval, head, top and rare, and functions, operators and expressions. Even new users who have recently adopted OpenSearch, can be productive day one, if they are familiar with the pipe (|) syntax. It enables developers, DevOps engineers, support engineers, site reliability engineers (SREs), and IT managers to effectively discover and explore log, monitoring and observability data stored in OpenSearch. - -We expand the capabilities of our Workbench, a comprehensive and integrated visual query tool currently supporting only SQL, to run on-demand PPL commands, and view and save results as text and JSON. We also add a new interactive standalone command line tool, the PPL CLI, to run on-demand PPL commands, and view and save results as text and JSON. - -The query start with search command and then flowing a set of command delimited by pipe (|). -| for example, the following query retrieve firstname and lastname from accounts if age large than 18. - -.. code-block:: - - source=accounts - | where age > 18 - | fields firstname, lastname - -* **Interfaces** - - - `Endpoint `_ - - - `Protocol `_ - -* **Administration** - - - `Plugin Settings `_ - - - `Security Settings `_ - - - `Monitoring `_ - - - `Datasource Settings `_ - - - `Prometheus Connector `_ - - - `Cross-Cluster Search `_ - -* **Language Structure** - - - `Identifiers `_ - - - `Data Types `_ - -* **Commands** - - The following commands are available in PPL: - - **Note:** Experimental commands are ready for use, but specific parameters may change based on feedback. - - ============================================================== ================== ======================== ============================================================================================== - Command Name Version Introduced Current Status Command Description - ============================================================== ================== ======================== ============================================================================================== - `search command `_ 1.0 stable (since 1.0) Retrieve documents from the index. - `where command `_ 1.0 stable (since 1.0) Filter the search result using boolean expressions. - `subquery command `_ 3.0 experimental (since 3.0) Embed one PPL query inside another for complex filtering and data retrieval operations. - `fields command `_ 1.0 stable (since 1.0) Keep or remove fields from the search result. - `rename command `_ 1.0 stable (since 1.0) Rename one or more fields in the search result. - `eval command `_ 1.0 stable (since 1.0) Evaluate an expression and append the result to the search result. - `replace command `_ 3.4 experimental (since 3.4) Replace text in one or more fields in the search result - `fillnull command `_ 3.0 experimental (since 3.0) Fill null with provided value in one or more fields in the search result. - `expand command `_ 3.1 experimental (since 3.1) Transform a single document into multiple documents by expanding a nested array field. - `flatten command `_ 3.1 experimental (since 3.1) Flatten a struct or an object field into separate fields in a document. - `table command `_ 3.3 experimental (since 3.3) Keep or remove fields from the search result using enhanced syntax options. - `stats command `_ 1.0 stable (since 1.0) Calculate aggregation from search results. - `eventstats command `_ 3.1 experimental (since 3.1) Calculate aggregation statistics and add them as new fields to each event. - `streamstats command `_ 3.4 experimental (since 3.4) Calculate cumulative or rolling statistics as events are processed in order. - `bin command `_ 3.3 experimental (since 3.3) Group numeric values into buckets of equal intervals. - `timechart command `_ 3.3 experimental (since 3.3) Create time-based charts and visualizations. - `chart command `_ 3.4 experimental (since 3.4) Apply statistical aggregations to search results and group the data for visualizations. - `trendline command `_ 3.0 experimental (since 3.0) Calculate moving averages of fields. - `sort command `_ 1.0 stable (since 1.0) Sort all the search results by the specified fields. - `reverse command `_ 3.2 experimental (since 3.2) Reverse the display order of search results. - `head command `_ 1.0 stable (since 1.0) Return the first N number of specified results after an optional offset in search order. - `dedup command `_ 1.0 stable (since 1.0) Remove identical documents defined by the field from the search result. - `top command `_ 1.0 stable (since 1.0) Find the most common tuple of values of all fields in the field list. - `rare command `_ 1.0 stable (since 1.0) Find the least common tuple of values of all fields in the field list. - `parse command `_ 1.3 stable (since 1.3) Parse a text field with a regular expression and append the result to the search result. - `grok command `_ 2.4 stable (since 2.4) Parse a text field with a grok pattern and append the results to the search result. - `rex command `_ 3.3 experimental (since 3.3) Extract fields from a raw text field using regular expression named capture groups. - `regex command `_ 3.3 experimental (since 3.3) Filter search results by matching field values against a regular expression pattern. - `spath command `_ 3.3 experimental (since 3.3) Extract fields from structured text data. - `patterns command `_ 2.4 stable (since 2.4) Extract log patterns from a text field and append the results to the search result. - `join command `_ 3.0 stable (since 3.0) Combine two datasets together. - `append command `_ 3.3 experimental (since 3.3) Append the result of a sub-search to the bottom of the input search results. - `appendcol command `_ 3.1 experimental (since 3.1) Append the result of a sub-search and attach it alongside the input search results. - `lookup command `_ 3.0 experimental (since 3.0) Add or replace data from a lookup index. - `multisearch command `_ 3.4 experimental (since 3.4) Execute multiple search queries and combine their results. - `ml command `_: 2.5 stable (since 2.5) Apply machine learning algorithms to analyze data. - `kmeans command `_ 1.3 stable (since 1.3) Apply the kmeans algorithm on the search result returned by a PPL command. - `ad command `_ 1.3 deprecated (since 2.5) Apply Random Cut Forest algorithm on the search result returned by a PPL command. - `describe command `_ 2.1 stable (since 2.1) Query the metadata of an index. - `explain command `_ 3.1 stable (since 3.1) Explain the plan of query. - `show datasources command `_ 2.4 stable (since 2.4) Query datasources configured in the PPL engine. - ============================================================== ================== ======================== ============================================================================================== - - - `Syntax `_ - PPL query structure and command syntax formatting - -* **Functions** - - - `Aggregation Functions `_ - - - `Collection Functions `_ - - - `Condition Functions `_ - - - `Cryptographic Functions `_ - - - `Date and Time Functions `_ - - - `Expressions `_ - - - `IP Address Functions `_ - - - `JSON Functions `_ - - - `Math Functions `_ - - - `Relevance Functions `_ - - - `String Functions `_ - - - `System Functions `_ - - - `Type Conversion Functions `_ - -* **Optimization** - - - `Optimization <../../user/optimization/optimization.rst>`_ - -* **Limitations** - - - `Limitations `_ diff --git a/docs/user/ppl/interfaces/endpoint.md b/docs/user/ppl/interfaces/endpoint.md new file mode 100644 index 00000000000..e1e9cf705bf --- /dev/null +++ b/docs/user/ppl/interfaces/endpoint.md @@ -0,0 +1,154 @@ +# Endpoint + +## Introduction + +To send query request to PPL plugin, you MUST use HTTP POST request. POST request doesn't have length limitation and allows for other parameters passed to plugin for other functionality such as prepared statement. And also the explain endpoint is used very often for query translation and troubleshooting. +## POST + +### Description + +You can send HTTP POST request to endpoint **/_plugins/_ppl** with your query in request body. +### Example + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{"query" : "source=accounts | fields firstname, lastname"}' +``` + +Expected output: + +```json +{ + "schema": [ + { + "name": "firstname", + "type": "string" + }, + { + "name": "lastname", + "type": "string" + } + ], + "datarows": [ + [ + "Amber", + "Duke" + ], + [ + "Hattie", + "Bond" + ], + [ + "Nanette", + "Bates" + ], + [ + "Dale", + "Adams" + ] + ], + "total": 4, + "size": 4 +} +``` + +## Explain + +### Description + +You can send HTTP explain request to endpoint **/_plugins/_ppl/_explain** with your query in request body to understand the execution plan for the PPL query. The explain endpoint is useful when user want to get insight how the query is executed in the engine. +### Description + +To translate your query, send it to explain endpoint. The explain output is OpenSearch domain specific language (DSL) in JSON format. You can just copy and paste it to your console to run it against OpenSearch directly. +Explain output could be set different formats: `standard` (the default format), `simple`, `extended`, `dsl`. +### Example 1 default (standard) format + +Explain query + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl/_explain \ +-d '{"query" : "source=state_country | where age>30"}' +``` + +Expected output: + +```json +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" + } +} + +``` + +### Example 2 simple format + +Explain query + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl/_explain?format=simple \ +-d '{"query" : "source=state_country | where age>30"}' +``` + +Expected output: + +```json +{ + "calcite": { + "logical": "LogicalSystemLimit\n LogicalProject\n LogicalFilter\n CalciteLogicalIndexScan\n" + } +} +``` + +### Example 3 extended format + +Explain query + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl/_explain?format=extended \ +-d '{"query" : "source=state_country | where age>30"}' +``` + +Expected output: + +```json +{ + "calcite": { + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n", + "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n", + "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n return v1stashed.scan();\n}\n\n\npublic Class getElementType() {\n return java.lang.Object[].class;\n}\n\n\n" + } +} +``` + +### Example 4 YAML format (experimental) + + YAML explain output is an experimental feature and not intended for + production use. The interface and output may change without notice. +Return Explain response format in In `yaml` format. +Explain query + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl/_explain?format=yaml \ +-d '{"query" : "source=state_country | where age>30"}' +``` + +Expected output: + +```yaml +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5]) + LogicalFilter(condition=[>($5, 30)]) + CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["name","country","state","month","year","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) +``` + \ No newline at end of file diff --git a/docs/user/ppl/interfaces/endpoint.rst b/docs/user/ppl/interfaces/endpoint.rst deleted file mode 100644 index 08032ad6cda..00000000000 --- a/docs/user/ppl/interfaces/endpoint.rst +++ /dev/null @@ -1,150 +0,0 @@ -.. highlight:: sh - -======== -Endpoint -======== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -Introduction -============ - -To send query request to PPL plugin, you MUST use HTTP POST request. POST request doesn't have length limitation and allows for other parameters passed to plugin for other functionality such as prepared statement. And also the explain endpoint is used very often for query translation and troubleshooting. - -POST -==== - -Description ------------ - -You can send HTTP POST request to endpoint **/_plugins/_ppl** with your query in request body. - -Example -------- - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl \ - ... -d '{"query" : "source=accounts | fields firstname, lastname"}' - { - "schema": [ - { - "name": "firstname", - "type": "string" - }, - { - "name": "lastname", - "type": "string" - } - ], - "datarows": [ - [ - "Amber", - "Duke" - ], - [ - "Hattie", - "Bond" - ], - [ - "Nanette", - "Bates" - ], - [ - "Dale", - "Adams" - ] - ], - "total": 4, - "size": 4 - } - -Explain -======= - -Description ------------ - -You can send HTTP explain request to endpoint **/_plugins/_ppl/_explain** with your query in request body to understand the execution plan for the PPL query. The explain endpoint is useful when user want to get insight how the query is executed in the engine. - -Description ------------ - -To translate your query, send it to explain endpoint. The explain output is OpenSearch domain specific language (DSL) in JSON format. You can just copy and paste it to your console to run it against OpenSearch directly. - -Explain output could be set different formats: ``standard`` (the default format), ``simple``, ``extended``, ``dsl``. - - -Example 1 default (standard) format ------------------------------------ - -Explain query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl/_explain \ - ... -d '{"query" : "source=state_country | where age>30"}' - { - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n" - } - } - -Example 2 simple format ------------------------ - -Explain query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl/_explain?format=simple \ - ... -d '{"query" : "source=state_country | where age>30"}' - { - "calcite": { - "logical": "LogicalSystemLimit\n LogicalProject\n LogicalFilter\n CalciteLogicalIndexScan\n" - } - } - -Example 3 extended format -------------------------- - -Explain query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl/_explain?format=extended \ - ... -d '{"query" : "source=state_country | where age>30"}' - { - "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n", - "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"name\",\"country\",\"state\",\"month\",\"year\",\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n", - "extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n return v1stashed.scan();\n}\n\n\npublic Class getElementType() {\n return java.lang.Object[].class;\n}\n\n\n" - } - } - -Example 4 YAML format (experimental) ------------------------------------ - -.. note:: - YAML explain output is an experimental feature and not intended for - production use. The interface and output may change without notice. - -Return Explain response format in In ``yaml`` format. - -Explain query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl/_explain?format=yaml \ - ... -d '{"query" : "source=state_country | where age>30"}' - calcite: - logical: | - LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(name=[$0], country=[$1], state=[$2], month=[$3], year=[$4], age=[$5]) - LogicalFilter(condition=[>($5, 30)]) - CalciteLogicalIndexScan(table=[[OpenSearch, state_country]]) - physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[name, country, state, month, year, age], FILTER->>($5, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["name","country","state","month","year","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/docs/user/ppl/interfaces/protocol.md b/docs/user/ppl/interfaces/protocol.md new file mode 100644 index 00000000000..680f01fd379 --- /dev/null +++ b/docs/user/ppl/interfaces/protocol.md @@ -0,0 +1,130 @@ +# Protocol + +## Introduction + +For the protocol, PPL endpoint provides response formats in the JDBC format. JDBC format is widely used because it provides schema information and more functionality such as pagination. Besides JDBC driver, various clients can benefit from the detailed and well formatted response. +## Request/Response Format + +### Description + +The body of HTTP POST request can take PPL query. +### Example 1 + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{"query" : "source=accounts | fields firstname, lastname"}' +``` + +Expected output: + +```json +{ + "schema": [ + { + "name": "firstname", + "type": "string" + }, + { + "name": "lastname", + "type": "string" + } + ], + "datarows": [ + [ + "Amber", + "Duke" + ], + [ + "Hattie", + "Bond" + ], + [ + "Nanette", + "Bates" + ], + [ + "Dale", + "Adams" + ] + ], + "total": 4, + "size": 4 +} +``` + +## JDBC Format + +### Description + +By default the plugin return JDBC format. JDBC format is provided for JDBC driver and client side that needs both schema and result set well formatted. +### Example 1 + +Here is an example for normal response. The `schema` includes field name and its type and `datarows` includes the result set. + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{"query" : "source=accounts | fields firstname, lastname"}' +``` + +Expected output: + +```json +{ + "schema": [ + { + "name": "firstname", + "type": "string" + }, + { + "name": "lastname", + "type": "string" + } + ], + "datarows": [ + [ + "Amber", + "Duke" + ], + [ + "Hattie", + "Bond" + ], + [ + "Nanette", + "Bates" + ], + [ + "Dale", + "Adams" + ] + ], + "total": 4, + "size": 4 +} +``` + +### Example 2 + +If any error occurred, error message and the cause will be returned instead. + +```bash ppl +curl -sS -H 'Content-Type: application/json' \ +-X POST localhost:9200/_plugins/_ppl \ +-d '{"query" : "source=unknown | fields firstname, lastname"}' +``` + +Expected output: + +```json +{ + "error": { + "reason": "Error occurred in OpenSearch engine: no such index [unknown]", + "details": "[unknown] IndexNotFoundException[no such index [unknown]]\nFor more details, please send request for Json format to see the raw response from OpenSearch engine.", + "type": "IndexNotFoundException" + }, + "status": 404 +} +``` + \ No newline at end of file diff --git a/docs/user/ppl/interfaces/protocol.rst b/docs/user/ppl/interfaces/protocol.rst deleted file mode 100644 index a76dba301b5..00000000000 --- a/docs/user/ppl/interfaces/protocol.rst +++ /dev/null @@ -1,137 +0,0 @@ -.. highlight:: sh - -======== -Protocol -======== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - - -Introduction -============ - -For the protocol, PPL endpoint provides response formats in the JDBC format. JDBC format is widely used because it provides schema information and more functionality such as pagination. Besides JDBC driver, various clients can benefit from the detailed and well formatted response. - - -Request/Response Format -============== - -Description ------------ - -The body of HTTP POST request can take PPL query. - -Example 1 ---------- -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl \ - ... -d '{"query" : "source=accounts | fields firstname, lastname"}' - { - "schema": [ - { - "name": "firstname", - "type": "string" - }, - { - "name": "lastname", - "type": "string" - } - ], - "datarows": [ - [ - "Amber", - "Duke" - ], - [ - "Hattie", - "Bond" - ], - [ - "Nanette", - "Bates" - ], - [ - "Dale", - "Adams" - ] - ], - "total": 4, - "size": 4 - } - -JDBC Format -=========== - -Description ------------ - -By default the plugin return JDBC format. JDBC format is provided for JDBC driver and client side that needs both schema and result set well formatted. - -Example 1 ---------- - -Here is an example for normal response. The `schema` includes field name and its type and `datarows` includes the result set. - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl \ - ... -d '{"query" : "source=accounts | fields firstname, lastname"}' - { - "schema": [ - { - "name": "firstname", - "type": "string" - }, - { - "name": "lastname", - "type": "string" - } - ], - "datarows": [ - [ - "Amber", - "Duke" - ], - [ - "Hattie", - "Bond" - ], - [ - "Nanette", - "Bates" - ], - [ - "Dale", - "Adams" - ] - ], - "total": 4, - "size": 4 - } - -Example 2 ---------- - -If any error occurred, error message and the cause will be returned instead. - -PPL query:: - - sh$ curl -sS -H 'Content-Type: application/json' \ - ... -X POST localhost:9200/_plugins/_ppl \ - ... -d '{"query" : "source=unknown | fields firstname, lastname"}' - { - "error": { - "reason": "Error occurred in OpenSearch engine: no such index [unknown]", - "details": "[unknown] IndexNotFoundException[no such index [unknown]]\nFor more details, please send request for Json format to see the raw response from OpenSearch engine.", - "type": "IndexNotFoundException" - }, - "status": 404 - } - diff --git a/docs/user/ppl/limitations/limitations.md b/docs/user/ppl/limitations/limitations.md new file mode 100644 index 00000000000..6ef9bd7407b --- /dev/null +++ b/docs/user/ppl/limitations/limitations.md @@ -0,0 +1,89 @@ +# Limitations + +## Inconsistent Field Types across indices + +* If the same field has different types across indices (e.g., `field` is a `string` in one index and an `integer` in another), PPL selects a field type from one of the indices—this selection is non-deterministic. Fields with other types are ignored during query execution. +* For `object` fields, [PPL merges subfields from different indices to tolerate schema variations](https://github.com/opensearch-project/sql/issues/3625). + +## Unsupported OpenSearch Field Types + +PPL does not support all [OpenSearch data types](https://docs.opensearch.org/latest/mappings/supported-field-types/index/). (e.g., `flattened`, some complex `nested` usages). Unsupported fields are excluded from `DESCRIBE` and `SOURCE` outputs. At runtime: Queries referencing unsupported fields fail with semantic or resolution errors. Such fields are ignored in projections unless explicitly filtered out or removed at ingestion. + +| OpenSearch Data Type | PPL | +| --- | --- | +| knn_vector | Ignored | +| Range field types | Ignored | +| Object - flat_object | Ignored | +| Object - join | Ignored | +| String - Match-only text | Ignored | +| String - Wildcard | Ignored | +| String - token_count | Ignored | +| String - constant_keyword | Ignored | +| Autocomplete | Ignored | +| Geoshape | Ignored | +| Cartesian field types | Ignored | +| Rank field types | Ignored | +| Star-tree | Ignored | +| derived | Ignored | +| Percolator | Ignored | + +## Field Parameters + +For a field to be queryable in PPL, the following index settings must be enabled: + +| Setting | Description | Required For | +| --- | --- | --- | +| _source: true | Stores the original JSON document | Required for fetch raw data. | +| index: true | Enables field indexing | Required for filtering, search, and aggregations | +| doc_values: true | Enables columnar access for aggregations/sorting | Required for `stats`, `sort` | + +## Nested Field Behavior + +* There are [limitations](https://github.com/opensearch-project/sql/issues/52) regarding the nested levels and query types that needs improvement. + +## Multi-value Field Behavior + +OpenSearch does not natively support the ARRAY data type but does allow multi-value fields implicitly. The +SQL/PPL plugin adheres strictly to the data type semantics defined in index mappings. When parsing OpenSearch +responses, it expects data to match the declared type and does not account for data in array format. If the +plugins.query.field_type_tolerance setting is enabled, the SQL/PPL plugin will handle array datasets by returning +scalar data types, allowing basic queries (e.g., source = tbl \| where condition). However, using multi-value +fields in expressions or functions will result in exceptions. If this setting is disabled or absent, only the +first element of an array is returned, preserving the default behavior. +## Unsupported Functionalities in Calcite Engine + +Since 3.0.0, we introduce Apache Calcite as an experimental query engine. Please see [introduce v3 engine](../../../dev/intro-v3-engine.md). +For the following functionalities, the query will be forwarded to the V2 query engine. It means following functionalities cannot work with new PPL commands/functions introduced in 3.0.0 and above. +* All SQL queries +* PPL Queries against non-OpenSearch data sources +* `dedup` with `consecutive=true` +* Search relevant commands + * AD + * ML + * Kmeans +* `show datasources` and command +* Commands with `fetch_size` parameter + + +## Malformed Field Names in Object Fields + +OpenSearch normally rejects field names containing problematic dot patterns (such as `.`, `..`, `.a`, `a.`, or `a..b`). However, when an object field has `enabled: false`, OpenSearch bypasses field name validation and allows storing documents with any field names. + +If a document contains malformed field names inside an object field, PPL ignores those malformed field names. Other valid fields in the document are returned normally. + +**Example of affected data:** + +```json +{ + "log": { + ".": "value1", + ".a": "value2", + "a.": "value3", + "a..b": "value4" + } +} +``` + +When ``log`` is an object field with ``enabled: false``, subfields with malformed names are ignored. + +**Recommendation:** Avoid using field names that contain leading dots, trailing dots, consecutive dots, or consist only of dots. This aligns with OpenSearch's default field naming requirements. diff --git a/docs/user/ppl/limitations/limitations.rst b/docs/user/ppl/limitations/limitations.rst deleted file mode 100644 index 41d3a007d23..00000000000 --- a/docs/user/ppl/limitations/limitations.rst +++ /dev/null @@ -1,132 +0,0 @@ -=========== -Limitations -=========== - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - -Inconsistent Field Types across indices -======================================= - -* If the same field has different types across indices (e.g., ``field`` is a ``string`` in one index and an ``integer`` in another), PPL selects a field type from one of the indices—this selection is non-deterministic. Fields with other types are ignored during query execution. -* For ``object`` fields, `PPL merges subfields from different indices to tolerate schema variations `_. - -Unsupported OpenSearch Field Types -================================== - -PPL does not support all `OpenSearch data types `_. (e.g., ``flattened``, some complex ``nested`` usages). Unsupported fields are excluded from ``DESCRIBE`` and ``SOURCE`` outputs. At runtime: Queries referencing unsupported fields fail with semantic or resolution errors. Such fields are ignored in projections unless explicitly filtered out or removed at ingestion. - -+---------------------------+---------+ -| OpenSearch Data Type | PPL | -+===========================+=========+ -| knn_vector | Ignored | -+---------------------------+---------+ -| Range field types | Ignored | -+---------------------------+---------+ -| Object - flat_object | Ignored | -+---------------------------+---------+ -| Object - join | Ignored | -+---------------------------+---------+ -| String - Match-only text | Ignored | -+---------------------------+---------+ -| String - Wildcard | Ignored | -+---------------------------+---------+ -| String - token_count | Ignored | -+---------------------------+---------+ -| String - constant_keyword | Ignored | -+---------------------------+---------+ -| Autocomplete | Ignored | -+---------------------------+---------+ -| Geoshape | Ignored | -+---------------------------+---------+ -| Cartesian field types | Ignored | -+---------------------------+---------+ -| Rank field types | Ignored | -+---------------------------+---------+ -| Star-tree | Ignored | -+---------------------------+---------+ -| derived | Ignored | -+---------------------------+---------+ -| Percolator | Ignored | -+---------------------------+---------+ - -Field Parameters -================ - -For a field to be queryable in PPL, the following index settings must be enabled: - -+------------------+--------------------------------------------------+--------------------------------------------------+ -| Setting | Description | Required For | -+==================+==================================================+==================================================+ -| _source: true | Stores the original JSON document | Required for fetch raw data. | -+------------------+--------------------------------------------------+--------------------------------------------------+ -| index: true | Enables field indexing | Required for filtering, search, and aggregations | -+------------------+--------------------------------------------------+--------------------------------------------------+ -| doc_values: true | Enables columnar access for aggregations/sorting | Required for `stats`, `sort` | -+------------------+--------------------------------------------------+--------------------------------------------------+ - - -Nested Field Behavior -===================== - -* There are `limitations `_ regarding the nested levels and query types that needs improvement. - -Multi-value Field Behavior -========================== - -OpenSearch does not natively support the ARRAY data type but does allow multi-value fields implicitly. The -SQL/PPL plugin adheres strictly to the data type semantics defined in index mappings. When parsing OpenSearch -responses, it expects data to match the declared type and does not account for data in array format. If the -plugins.query.field_type_tolerance setting is enabled, the SQL/PPL plugin will handle array datasets by returning -scalar data types, allowing basic queries (e.g., source = tbl | where condition). However, using multi-value -fields in expressions or functions will result in exceptions. If this setting is disabled or absent, only the -first element of an array is returned, preserving the default behavior. - -Unsupported Functionalities in Calcite Engine -============================================= - -Since 3.0.0, we introduce Apache Calcite as an experimental query engine. Please see `introduce v3 engine <../../../dev/intro-v3-engine.md>`_. -For the following functionalities, the query will be forwarded to the V2 query engine. It means following functionalities cannot work with new PPL commands/functions introduced in 3.0.0 and above. - -* All SQL queries - -* PPL Queries against non-OpenSearch data sources - -* ``dedup`` with ``consecutive=true`` - -* Search relevant commands - - * AD - * ML - * Kmeans - -* ``show datasources`` and command - -* Commands with ``fetch_size`` parameter - -Malformed Field Names in Object Fields -====================================== - -OpenSearch normally rejects field names containing problematic dot patterns (such as ``.``, ``..``, ``.a``, ``a.``, or ``a..b``). However, when an object field has ``enabled: false``, OpenSearch bypasses field name validation and allows storing documents with any field names. - -If a document contains malformed field names inside an object field, PPL ignores those malformed field names. Other valid fields in the document are returned normally. - -**Example of affected data:** - -.. code-block:: json - - { - "log": { - ".": "value1", - ".a": "value2", - "a.": "value3", - "a..b": "value4" - } - } - -When ``log`` is an object field with ``enabled: false``, subfields with malformed names are ignored. - -**Recommendation:** Avoid using field names that contain leading dots, trailing dots, consecutive dots, or consist only of dots. This aligns with OpenSearch's default field naming requirements. diff --git a/docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md b/docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md index 25d726f8be4..9111141078a 100644 --- a/docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md +++ b/docs/user/ppl/reference/splunk_to_ppl_cheat_sheet.md @@ -1,73 +1,73 @@ -# Splunk to OpenSearch PPL Cheat Sheet +# Splunk to OpenSearch PPL Cheat Sheet This cheat sheet helps Splunk users transition to OpenSearch's PPL. It maps common Splunk Search Processing Language (SPL) commands to their PPL equivalents with examples. -## Structure and Concepts - +## Structure and Concepts + | Aspect | Splunk SPL | OpenSearch PPL | Notes | |--------|------------|---------------|-------| | Query structure | `search terms \| command` | `search term source = index \| command` | PPL requires explicit source at the beginning | -| Index reference | `index=name*` | `source=name*` | Different command to specify data source, [PPL support refering to multiple indices](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/general/identifiers.rst#multiple-indices)| +| Index reference | `index=name*` | `source=name*` | Different command to specify data source, [PPL support refering to multiple indices](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/general/identifiers.md#multiple-indices)| | Raw field | Special `_raw` field | Identify a field in your OpenSearch data that contains the text content you want to work with (often `message` or `content` fields in log data) | default field configured by the index.query.default_field setting (defaults to * which searches all fields) | | Time field | Special `_time` field | User-specified timestamp field | PPL use @timestamp by default | + - -## Command Reference +## Command Reference This table provides a mapping between Splunk SPL commands and their OpenSearch PPL equivalents: - + | Splunk SPL | OpenSearch PPL | Purpose | |------------|---------------|---------| -| append | [append](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/append.rst) | Append results from subsearch | -| appendcols | [appendcols](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/appendcol.rst) | Append columns from subsearch | -| bin | [bin](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/bin.rst) | Group numeric values into bins | -| bucket | [bin](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/bin.rst) | Group numeric values into bins | -| dedup | [dedup](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/dedup.rst) | Remove duplicate results | -| eval | [eval](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/eval.rst) | Calculate and create new fields | -| eventstats | [eventstats](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/eventstats.rst) | Calculate statistics while preserving events | -| mvexpand | [expand](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/expand.rst) | Expand multi-value fields | -| fields | [fields](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/fields.rst) | Include or exclude fields | -| fillnull | [fillnull](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/fillnull.rst) | Replace null values | -| head | [head](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/head.rst) | Retrieve the first N results | -| join | [join](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/join.rst) | Combine results from multiple sources | -| lookup | [lookup](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/lookup.rst) | Enrich data with lookups | -| rare | [rare](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/rare.rst) | Find the least common values | -| regex | [regex](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/regex.rst) | Filter with regular expression pattern | -| rename | [rename](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/rename.rst) | Rename fields in results | -| reverse | [reverse](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/reverse.rst) | Reverse the order of search results | -| rex | [rex](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/rex.rst) | Extract with regular expression pattern | -| search | [search](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/search.rst) | Basic searching of data | -| sort | [sort](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/sort.rst) | Sort results by specified fields | -| spath | [spath](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/spath.rst) | Extracting fields from structured text data | -| stats | [stats](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/stats.rst) | Statistical aggregation of data | -| subsearch | [subsearch](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/subquery.rst) | Enrich main search | -| table | [table](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/table.rst) | Select specific fields to display | -| timechart | [timechart](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/timechart.rst) | Statistical aggregation of time-series data | -| top | [top](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/top.rst) | Find the most common values | -| trendline | [trendline](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/trendline.rst) | Calculate moving averages of fields | -| where | [where](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/where.rst) | Filter results based on conditions | - - -## Example Query Conversions +| append | [append](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/append.md) | Append results from subsearch | +| appendcols | [appendcols](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/appendcol.md) | Append columns from subsearch | +| bin | [bin](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/bin.md) | Group numeric values into bins | +| bucket | [bin](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/bin.md) | Group numeric values into bins | +| dedup | [dedup](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/dedup.md) | Remove duplicate results | +| eval | [eval](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/eval.md) | Calculate and create new fields | +| eventstats | [eventstats](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/eventstats.md) | Calculate statistics while preserving events | +| mvexpand | [expand](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/expand.md) | Expand multi-value fields | +| fields | [fields](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/fields.md) | Include or exclude fields | +| fillnull | [fillnull](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/fillnull.md) | Replace null values | +| head | [head](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/head.md) | Retrieve the first N results | +| join | [join](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/join.md) | Combine results from multiple sources | +| lookup | [lookup](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/lookup.md) | Enrich data with lookups | +| rare | [rare](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/rare.md) | Find the least common values | +| regex | [regex](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/regex.md) | Filter with regular expression pattern | +| rename | [rename](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/rename.md) | Rename fields in results | +| reverse | [reverse](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/reverse.md) | Reverse the order of search results | +| rex | [rex](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/rex.md) | Extract with regular expression pattern | +| search | [search](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/search.md) | Basic searching of data | +| sort | [sort](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/sort.md) | Sort results by specified fields | +| spath | [spath](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/spath.md) | Extracting fields from structured text data | +| stats | [stats](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/stats.md) | Statistical aggregation of data | +| subsearch | [subsearch](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/subquery.md) | Enrich main search | +| table | [table](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/table.md) | Select specific fields to display | +| timechart | [timechart](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/timechart.md) | Statistical aggregation of time-series data | +| top | [top](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/top.md) | Find the most common values | +| trendline | [trendline](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/trendline.md) | Calculate moving averages of fields | +| where | [where](https://github.com/opensearch-project/sql/blob/main/docs/user/ppl/cmd/where.md) | Filter results based on conditions | + + +## Example Query Conversions **Simple search:** -- Splunk: `error failed status=500` -- PPL: ```source=`*` error failed status=500``` +- Splunk: `error failed status=500` +- PPL: ```source=`*` error failed status=500``` **Aggregation:** -- Splunk: `... | stats count by host, status | sort -count` -- PPL: `... | stats count() by host, status | sort - count` +- Splunk: `... | stats count by host, status | sort -count` +- PPL: `... | stats count() by host, status | sort - count` **Time-based query:** -- Splunk: `... | timechart span=1h count by host` -- PPL: `... | timechart span=1h count by host` +- Splunk: `... | timechart span=1h count by host` +- PPL: `... | timechart span=1h count by host` **Complex calculation:** -- Splunk: `... | eval mb=bytes/1024/1024 | stats avg(mb) AS avg_mb by host | where avg_mb > 100` -- PPL: `... | eval mb=bytes/1024/1024 | stats avg(mb) as avg_mb by host | where avg_mb > 100` - -## Basic Search Syntax +- Splunk: `... | eval mb=bytes/1024/1024 | stats avg(mb) AS avg_mb by host | where avg_mb > 100` +- PPL: `... | eval mb=bytes/1024/1024 | stats avg(mb) as avg_mb by host | where avg_mb > 100` +## Basic Search Syntax + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Basic search | `error` | `error` | Same syntax | @@ -77,26 +77,26 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Multiple values | `field IN (404, 503)` | `field in (404, 503)` | Same syntax | | Field doesn't equal | `field!=404` | `field!=404` | Same syntax | | Wildcard search | `field=value*` | `field=value*` | Same syntax | - -## Field Selection and Manipulation - + +## Field Selection and Manipulation + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Select fields | `... \| fields field1, field2` | `... \| fields field1, field2` | Same syntax | | Exclude fields | `... \| fields - field3` | `... \| fields - field3` | Same syntax | | Rename fields | `... \| rename field1 AS new_name` | `... \| rename field1 as new_name` | Same syntax | | Calculate field | `... \| eval new_field=field1 + field2` | `... \| eval new_field = field1 + field2` | Same syntax | - -## Filtering - + +## Filtering + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Filter results | `... \| where field > 100` | `... \| where field > 100` | Same syntax | | Compound filter | `... \| where field1=200 OR field2=203` | `... \| where field1=200 or field2=203` | Same syntax | + - -## Aggregation - +## Aggregation + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Count | `... \| stats count` | `... \| stats count` | Same syntax | @@ -105,9 +105,9 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Distinct count | `... \| stats dc(field)` | `... \| stats dc(field)` | Same syntax | | Min/Max | `... \| stats min(field), max(field)` | `... \| stats min(field), max(field)` | Same syntax | | Percentiles | `... \| stats perc95(field)` | `... \| stats perc95(field)` | Same syntax | - -## Sorting and Limiting - + +## Sorting and Limiting + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Sort ascending | `... \| sort field` | `... \| sort field` | Same syntax | @@ -115,9 +115,9 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Sort multiple | `... \| sort field1, -field2` | `... \| sort field1, -field2` | Same syntax | | Limit results | `... \| head 10` | `... \| head 10` | Same syntax | | Get last results | `... \| tail 10` | `... \| tail 10` | Same syntax | - -## Rex vs Parse - + +## Rex vs Parse + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Basic extraction | `... \| rex field=address "(?\d+) (?.+)"` | `... \| rex address "(?\d+) (?.+)"` | Same syntax | @@ -125,24 +125,24 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Search and replace mode | `... \| rex field=address mode=sed "s/\d+//g"` | `... \| rex field=address mode=sed "s/\d+//g"` | Same syntax | | Field override | `... \| rex field=address "(?
    .+)"` | `... \| rex address "(?
    .+)"` | Same syntax | | Default field (_raw) | `... \| rex "(?\d+) (?.+)"` | Not supported | PPL does not support implicit _raw field and requires explicit field specification | - -## Time Functions - + +## Time Functions + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Relative time | `earliest=-1d latest=now()` | `earliest("-1d", @timestamp) and latest("now", @timestamp)` | PPL supports earliest() and latest() functions | | Time extraction | `... \| eval hour=strftime(now(), "%H")` | `... \| eval hour = strftime(now(), '%H')` | Same syntax | | Time bucket | `... \| bin _time span=5m \| stats count by _time` | `... \| stats count by span(@timestamp, 5m)` | PPL uses `span()` | - -## Dedup - + +## Dedup + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Deduplicate | `... \| dedup field1, field2` | `... \| dedup field1, field2` | Same syntax | | Deduplicate with count | `... \| dedup 2 field1` | `... \| dedup 2 field1` | Same syntax | - -## Lookup and Joins - + +## Lookup and Joins + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Basic lookup | `... \| lookup vendors product_id` | `... \| lookup vendors product_id` | Same syntax | @@ -154,25 +154,25 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Left join | `... \| join type=left vendors [search index=vendors]` | `... \| left join vendors` | Different syntax format | | Join with ON clause | `... \| join type=inner left=a right=b where a.id = b.id vendors` | `... \| inner join left=a right=b ON a.id = b.id vendors` | PPL uses "ON" instead of "where" | | Append columns | `... \| appendcols [search source=other_index \| fields id, status]` | `... \| appendcols [source=other_index \| fields id, status]` | Similar syntax | - -## Field Manipulation - + +## Field Manipulation + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Include fields | `... \| fields field1, field2` | `... \| fields field1, field2` | Same syntax | | Exclude fields | `... \| fields - field3` | `... \| fields - field3` | Same syntax | | Rename fields | `... \| rename field1 as new_name` | `... \| rename field1 as new_name` | PPL uses lowercase "as" | | Replace null values | `... \| fillnull value=0 field1, field2` | `... \| fillnull with 0 in field1, field2` | Similar syntax but different format | - -## Handling Null Values - + +## Handling Null Values + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Basic null replacement | `... \| fillnull value=0 field1` | `... \| fillnull with 0 in field1` | Similar syntax but uses `with...in` format | | Multiple fields | `... \| fillnull value="N/A" field1, field2, field3` | `... \| fillnull with 'N/A' in field1, field2, field3` | Similar syntax but uses `with...in` format | - -## Results Limiting - + +## Results Limiting + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | First N results | `... \| head 10` | `... \| head 10` | Same syntax | @@ -180,9 +180,9 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Moving average | `... \| trendline sma5(value)` | `... \| trendline sma5(value)` | Same syntax | | Top values | `... \| top 10 field` | `... \| top 10 field` | Same syntax | | Rare values | `... \| rare 10 field` | `... \| rare 10 field` | Same syntax | - -## String Functions - + +## String Functions + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | String concatenation | `... \| eval result=field1 + " " + field2` | `... \| eval result = concat(field1, ' ', field2)` | PPL requires `concat()` function | @@ -193,9 +193,9 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Replace | `... \| eval result=replace(field, "pattern", "replacement")` | `... \| eval result = replace(field, 'pattern', 'replacement')` | Same syntax | | Trim whitespace | `... \| eval result=trim(field)` | `... \| eval result = trim(field)` | Same syntax | | Contains (wildcard) | `... \| eval result=like(field, "%pattern%")` | `... \| eval result = like(field, '%pattern%')` | Same syntax | - -## Conditional Functions - + +## Conditional Functions + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | If condition | `... \| eval result=if(field > 100, "High", "Low")` | `... \| eval result = if(field > 100, 'High', 'Low')` | Same syntax | @@ -203,9 +203,9 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | NULL check | `... \| eval result=if(isnull(field), "Missing", field)` | `... \| eval result = if(isnull(field), 'Missing', field)` | Same syntax | | Empty check | `... \| eval result=if(isnotnull(field), field, "Default")` | `... \| eval result = if(isnotnull(field), field, 'Default')` | Same syntax | | Coalesce (first non-null) | `... \| eval result=coalesce(field1, field2, "default")` | `... \| eval result = coalesce(field1, field2, 'default')` | Same syntax | - -## Math Functions - + +## Math Functions + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Addition | `... \| eval sum=field1 + field2` | `... \| eval sum = field1 + field2` | Same syntax | @@ -219,9 +219,9 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Floor | `... \| eval result=floor(field)` | `... \| eval result = floor(field)` | Same syntax | | Power | `... \| eval result=pow(field, 2)` | `... \| eval result = pow(field, 2)` | Same syntax | | Square root | `... \| eval result=sqrt(field)` | `... \| eval result = sqrt(field)` | Same syntax | - -## Date and Time Functions - + +## Date and Time Functions + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | Current time | `... \| eval now=now()` | `... \| eval now = now()` | Same syntax | @@ -230,11 +230,12 @@ This table provides a mapping between Splunk SPL commands and their OpenSearch P | Day ago | `... \| eval yesterday=relative_time(now(), "-1d")` | `... \| eval yesterday = date_sub(now(), INTERVAL 1 DAY)` | PPL uses interval syntax | | Day ahead | `... \| eval tomorrow=relative_time(now(), "+1d")` | `... \| eval tomorrow = date_add(now(), INTERVAL 1 DAY)` | PPL uses interval syntax | | Time difference | `... \| eval diff=(_time2 - _time1)` | `... \| eval diff = date_diff('second', timestamp1, timestamp2)` | PPL uses function | - -## Other Functions - + +## Other Functions + | Operation | Splunk SPL | OpenSearch PPL | Notes | |-----------|------------|---------------|-------| | MD5 hash | Not native | `... \| eval hash = md5('string')` | PPL-specific feature | | SHA1 hash | Not native | `... \| eval hash = sha1('string')` | PPL-specific feature | | JSON extraction | `... \| spath input=data path=user.name output=username` | `... \| eval username = json_extract(data, '$.user.name')` | Different approach | + \ No newline at end of file diff --git a/doctest/markdown_parser.py b/doctest/markdown_parser.py new file mode 100644 index 00000000000..00ea5441d49 --- /dev/null +++ b/doctest/markdown_parser.py @@ -0,0 +1,286 @@ +""" +Markdown-based doctest parser for clean copy-paste documentation. + +Parses Markdown code fences instead of RST directives. +""" + +import inspect +import re +from pathlib import Path +from typing import Callable, List, Optional, Tuple, Union +import doctest + + +class MarkdownDocTestParser: + """ + Parses Markdown files looking for paired code blocks: + 1. Input code block (sql, ppl, sh, bash, bash ppl) + 2. Output code block (text, console, json, yaml, or output) + + Example Markdown format: + + ```sql + SELECT * FROM accounts + ``` + + ```text + +------+ + | name | + +------+ + | John | + +------+ + ``` + """ + + # Regex to match Markdown code fences with optional attributes + CODE_FENCE_PATTERN = re.compile( + r'^```(\w+)([^\n]*?)\s*\n' # ```language [attributes] (no newlines in attributes) + r'(.*?)' # code content (non-greedy) + r'^```\s*$', # closing ``` + re.MULTILINE | re.DOTALL + ) + + def __init__(self, input_languages: Optional[List[str]] = None, + output_languages: Optional[List[str]] = None, + transform: Optional[Callable] = None) -> None: + """ + Args: + input_languages: List of languages for input blocks (e.g., ['sql', 'ppl']) + output_languages: List of languages for output blocks (e.g., ['text', 'console']) + transform: Function to transform input code before execution + """ + self.input_languages = input_languages or ['sql', 'ppl', 'bash', 'sh', 'bash ppl'] + self.output_languages = output_languages or ['text', 'console', 'output', 'json', 'yaml'] + self.transform = transform or (lambda x: x) + + def parse(self, text: str, name: str = '') -> doctest.DocTest: + """ + Parse Markdown text and extract test cases from code fence pairs. + + Returns a DocTest object compatible with doctest.DocTestRunner. + """ + examples = [] + blocks = self._extract_code_blocks(text) + + # Find pairs of input/output blocks + i = 0 + while i < len(blocks) - 1: + lang1, code1, lineno1 = blocks[i] + lang2, code2, lineno2 = blocks[i + 1] + + # Check if this is an input/output pair + if lang1 in self.input_languages and lang2 in self.output_languages: + # Create a doctest example + source = code1.rstrip('\n') + want = code2.rstrip('\n') + '\n' # doctest expects trailing newline + + # Apply transform to source + if callable(self.transform): + # Check if transform accepts language parameter + sig = inspect.signature(self.transform) + if len(sig.parameters) > 1: + transformed_source = self.transform(source, lang1) + else: + transformed_source = self.transform(source) + else: + transformed_source = source + + example = doctest.Example( + source=transformed_source, + want=want, + lineno=lineno1, + indent=0, + options={} + ) + examples.append(example) + + # Skip the output block since we've paired it + i += 2 + else: + # Not a pair, move to next block + i += 1 + + return doctest.DocTest( + examples=examples, + globs={}, + name=name, + filename=name, + lineno=0, + docstring=text + ) + + def get_doctest(self, docstring: str, globs: dict, name: str, filename: str, lineno: int) -> doctest.DocTest: + """ + Extract a DocTest object from the given docstring. + This method is required for compatibility with DocFileSuite. + """ + # Read the file content + content = Path(filename).read_text(encoding='utf-8') + + # Parse the markdown content and update globs + doctest_obj = self.parse(content, name=filename) + doctest_obj.globs.update(globs) + return doctest_obj + + def _extract_code_blocks(self, text: str) -> List[Tuple[str, str, int]]: + """ + Extract all code blocks from Markdown text, skipping those with 'ignore' attribute. + + Returns list of (language, code, line_number) tuples. + """ + blocks = [] + for match in self.CODE_FENCE_PATTERN.finditer(text): + language = match.group(1).lower() + attributes = match.group(2) or "" + code = match.group(3) + lineno = text[:match.start()].count('\n') + 1 + + # Skip blocks with 'ignore' attribute + if "ignore" in attributes: + continue + + blocks.append((language, code, lineno)) + + return blocks + + +def create_markdown_suite(filepath: Union[str, Path], transform: Optional[Callable] = None, + setup: Optional[Callable] = None, globs: Optional[dict] = None) -> doctest.DocTestSuite: + """ + Create a test suite from a Markdown file. + + Args: + filepath: Path to Markdown file + transform: Function to transform input code + setup: Setup function to run before tests + globs: Global variables for test execution + + Returns: + doctest.DocTestSuite + """ + parser = MarkdownDocTestParser(transform=transform) + + content = Path(filepath).read_text(encoding='utf-8') + + doctest_obj = parser.parse(content, name=str(filepath)) + + # Set up globs if provided + if globs: + doctest_obj.globs.update(globs) + + # Create a test case + test = doctest.DocTestCase( + doctest_obj, + optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS, + setUp=setup + ) + + return doctest.DocTestSuite(test_finder=lambda: [doctest_obj]) + + +# Transform functions for different languages +def sql_markdown_transform(code: str, lang: str = "sql") -> str: + """Transform SQL code for execution.""" + return f'sql_cmd.process({repr(code.strip().rstrip(";"))})' + + +def ppl_markdown_transform(code: str, lang: str = "ppl") -> str: + """Transform PPL code for execution.""" + # Join multi-line PPL queries into a single line + # Remove leading/trailing whitespace and join lines with space + single_line = " ".join( + line.strip() for line in code.strip().split("\n") if line.strip() + ) + return f'ppl_cmd.process({repr(single_line.rstrip(";"))})' + + +def bash_markdown_transform(code: str, lang: str = "bash") -> str: + """Transform bash code for execution.""" + if code.strip().startswith("opensearchsql"): + match = re.search(r'opensearchsql\s+-q\s+"(.*?)"', code) + if match: + query = match.group(1) + return f'cmd.process({repr(query.strip().rstrip(";"))})' + return f'pretty_print(sh("""{code}""").stdout.decode("utf-8"))' + + +def bash_ppl_markdown_transform(code: str, lang: str = "bash ppl") -> str: + """Transform bash ppl code for execution (curl commands with PPL queries).""" + return f'pretty_print(sh("""{code}""").stdout.decode("utf-8"))' + + +def mixed_ppl_transform(code: str, lang: str = "ppl") -> str: + """Mixed transform that handles both ppl and bash ppl.""" + if lang == "bash ppl" or "curl" in code.lower(): + return bash_ppl_markdown_transform(code, lang) + else: + return ppl_markdown_transform(code, lang) + + +def detect_markdown_format(filepath: Union[str, Path]) -> bool: + """ + Check if a file uses Markdown code fences. + + Returns: + True if file uses ```language``` code fences + False otherwise + """ + content = Path(filepath).read_text(encoding='utf-8') + + # Check for Markdown code fences + return bool(re.search(r'^```\w+\s*\n', content, re.MULTILINE)) + + +def create_hybrid_markdown_suite(filepaths: List[Union[str, Path]], doc_type: str, + setup_func: Optional[Callable] = None) -> doctest.DocTestSuite: + """ + Create test suite for Markdown files. + + Args: + filepaths: List of Markdown file paths + doc_type: 'sql', 'ppl', or 'bash' + setup_func: Setup function to initialize test environment + + Returns: + doctest.DocTestSuite + """ + # Choose transform based on doc type + if 'sql' in doc_type: + transform = sql_markdown_transform + input_langs = ['sql'] + elif 'ppl' in doc_type: + transform = ppl_markdown_transform + input_langs = ['ppl'] + else: # bash + transform = bash_markdown_transform + input_langs = ['bash', 'sh'] + + parser = MarkdownDocTestParser( + input_languages=input_langs, + output_languages=['text', 'console', 'output'], + transform=transform + ) + + all_tests = [] + + for filepath in filepaths: + content = Path(filepath).read_text(encoding='utf-8') + + doctest_obj = parser.parse(content, name=str(filepath)) + + # Only add if there are examples + if doctest_obj.examples: + all_tests.append(doctest_obj) + + # Create test suite + def setUp(test): + if setup_func: + setup_func(test) + + suite = doctest.DocTestSuite( + test_finder=lambda: all_tests, + setUp=setUp, + optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS + ) + + return suite diff --git a/doctest/test_docs.py b/doctest/test_docs.py index d3cea5782b5..e57c41d6827 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -13,6 +13,7 @@ import unittest from concurrent.futures import ThreadPoolExecutor, as_completed from functools import partial +from typing import List import click import zc.customdoctests @@ -20,6 +21,15 @@ from opensearch_sql_cli.opensearch_connection import OpenSearchConnection from opensearch_sql_cli.utils import OutputSettings from opensearchpy import OpenSearch, helpers +from markdown_parser import mixed_ppl_transform + + +# Import Markdown parser +from markdown_parser import ( + MarkdownDocTestParser, + ppl_markdown_transform, + sql_markdown_transform, +) ENDPOINT = "http://localhost:9200" @@ -100,11 +110,11 @@ def requires_calcite(doc_category): class CategoryManager: - + def __init__(self, category_file_path='../docs/category.json'): self._categories = self.load_categories(category_file_path) self._all_docs_cache = None - + def load_categories(self, file_path): try: with open(file_path) as json_file: @@ -117,48 +127,87 @@ def load_categories(self, file_path): for category, docs in categories.items() } debug(f"Loaded {len(categories)} categories from {file_path}") + + # Validate markdown-only categories + for category_name in categories.keys(): + self._validate_category_files(category_name, categories[category_name]) + return categories except Exception as e: raise Exception(f"Failed to load categories from {file_path}: {e}") - + + def _validate_category_files(self, category_name, docs): + """Internal method to validate category files during loading.""" + if self.is_markdown_category(category_name): + # Markdown-only categories should not contain .rst files + rst_files = [doc for doc in docs if doc.endswith(".rst")] + if rst_files: + raise Exception( + f"Only markdown files supported for category: {category_name}" + ) + debug( + f"Category {category_name} validation passed - all files are markdown" + ) + else: + # Non-markdown categories should only contain .rst files + md_files = [doc for doc in docs if doc.endswith(".md")] + if md_files: + raise Exception( + f"Only .rst files supported for category: {category_name}. Markdown not yet supported." + ) + debug(f"Category {category_name} validation passed - all files are .rst") + def get_all_categories(self): return list(self._categories.keys()) - + def get_category_files(self, category_name): return self._categories.get(category_name, []) - + def get_all_docs(self): if self._all_docs_cache is None: self._all_docs_cache = [] for category_name, docs in self._categories.items(): self._all_docs_cache.extend(docs) return self._all_docs_cache - + def find_file_category(self, file_path): # Convert to relative path from docs root if file_path.startswith('../docs/'): rel_path = file_path[8:] # Remove '../docs/' prefix else: rel_path = file_path - + for category_name, docs in self._categories.items(): if rel_path in docs: debug(f"Found file {rel_path} in category {category_name}") return category_name - + # Fallback to path-based detection debug(f"File {rel_path} not found in categories, using path-based detection") return detect_doc_type_from_path(file_path) - + def requires_calcite(self, category_name): - return category_name.endswith('_calcite') - + return category_name.endswith("_calcite") + + def is_markdown_category(self, category_name): + """Check if category uses Markdown files.""" + return category_name in ("ppl_cli_calcite", "bash_calcite", "bash_settings") + + def validate_category_files(self, category_name): + """Validate that categories contain only the correct file types. + + Markdown categories should only contain .md files. + Non-markdown categories should only contain .rst files. + """ + docs = self.get_category_files(category_name) + self._validate_category_files(category_name, docs) + def get_setup_function(self, category_name): if self.requires_calcite(category_name): return set_up_test_indices_with_calcite else: return set_up_test_indices_without_calcite - + def get_parser_for_category(self, category_name): if category_name.startswith('bash'): return bash_parser @@ -169,11 +218,21 @@ def get_parser_for_category(self, category_name): else: # Default fallback return sql_cli_parser - + def find_matching_files(self, search_filename): - if not search_filename.endswith('.rst'): - search_filename += '.rst' - + # Support both .rst and .md extensions + if not search_filename.endswith(".rst") and not search_filename.endswith(".md"): + # Try both extensions + all_docs = self.get_all_docs() + matches = [ + doc + for doc in all_docs + if doc.endswith(search_filename + ".rst") + or doc.endswith(search_filename + ".md") + or doc.endswith(search_filename) + ] + return matches + all_docs = self.get_all_docs() matches = [doc for doc in all_docs if doc.endswith(search_filename)] return matches @@ -189,22 +248,33 @@ def __init__(self, query_language="sql", endpoint=ENDPOINT): def process(self, statement): debug(f"Executing {self.query_language.upper()} query: {statement}") - - data = self.execute_query(statement, use_console=False) - debug(f"Query result: {data}") - - if data is None: - debug("Query returned None - this may indicate an error or unsupported function") - print("Error: Query returned no data") - return - - output = self.formatter.format_output(data) - output = "\n".join(output) - click.echo(output) + + try: + data = self.execute_query(statement, use_console=False) + debug(f"Query result: {data}") + + if data is None: + debug( + "Query returned None - this may indicate an error or unsupported function" + ) + print("Error: Query returned no data") + return + + output = self.formatter.format_output(data) + output = "\n".join(output) + click.echo(output) + except Exception as e: + # Print detailed error information + print(f"Error executing query: {statement}") + print(f"Error type: {type(e).__name__}") + print(f"Error message: {str(e)}") + if hasattr(e, "info"): + print(f"Error info: {e.info}") + raise class CalciteManager: - + @staticmethod def set_enabled(enabled): import requests @@ -216,16 +286,17 @@ def set_enabled(enabled): response = requests.put(f"{ENDPOINT}/_plugins/_query/settings", json=calcite_settings, timeout=10) - + if response.status_code != 200: raise Exception(f"Failed to set Calcite setting: {response.status_code} {response.text}") -class TestDataManager: - + +class DataManager: + def __init__(self): - self.client = OpenSearch([ENDPOINT], verify_certs=True) + self.client = OpenSearch([ENDPOINT], verify_certs=True, timeout=60) self.is_loaded = False - + def load_file(self, filename, index_name): mapping_file_path = './test_mapping/' + filename if os.path.isfile(mapping_file_path): @@ -298,7 +369,7 @@ def bash_transform(s): def get_test_data_manager(): global test_data_manager if test_data_manager is None: - test_data_manager = TestDataManager() + test_data_manager = DataManager() return test_data_manager @@ -365,12 +436,70 @@ def create_cli_suite(filepaths, parser, setup_func): setUp=setup_func ) + +def create_markdown_suite(filepaths, category_name, setup_func): + """ + Create test suite for Markdown files. + + Args: + filepaths: List of Markdown file paths + category_name: Category name (e.g., 'ppl_cli_calcite') + setup_func: Setup function to initialize test environment + + Returns: + doctest.DocTestSuite + """ + + # Determine transform based on category + if "sql" in category_name: + transform = sql_markdown_transform + input_langs = ["sql"] + elif "ppl" in category_name: + transform = mixed_ppl_transform + input_langs = ["ppl", "bash ppl"] + elif "bash" in category_name: + transform = mixed_ppl_transform + input_langs = ["bash", "bash ppl", "sh"] + else: + # Default to PPL + transform = mixed_ppl_transform + input_langs = ["ppl", "sql", "bash ppl"] + + parser = MarkdownDocTestParser( + input_languages=input_langs, + output_languages=["text", "console", "output", "json", "yaml"], + transform=transform, + ) + + # Prepare globs for bash commands + test_globs = {} + if "bash" in category_name: + test_globs = { + "sh": partial( + subprocess.run, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + timeout=60, + shell=True, + ), + "pretty_print": pretty_print, + } + + return docsuite( + *filepaths, + parser=parser, + setUp=setup_func, + globs=test_globs, + ) + + # Entry point for unittest discovery def load_tests(loader, suite, ignore): tests = [] settings_tests = [] category_manager = CategoryManager() - + for category_name in category_manager.get_all_categories(): docs = category_manager.get_category_files(category_name) if not docs: @@ -381,7 +510,6 @@ def load_tests(loader, suite, ignore): settings_tests.append(suite) else: tests.append(suite) - random.shuffle(tests) if settings_tests: random.shuffle(settings_tests) @@ -391,7 +519,9 @@ def load_tests(loader, suite, ignore): def get_test_suite(category_manager: CategoryManager, category_name, filepaths): setup_func = category_manager.get_setup_function(category_name) - if category_name.startswith('bash'): + if category_manager.is_markdown_category(category_name): + return create_markdown_suite(list(filepaths), category_name, setup_func) + elif category_name.startswith("bash"): return create_bash_suite(filepaths, setup_func) else: parser = category_manager.get_parser_for_category(category_name) @@ -399,21 +529,21 @@ def get_test_suite(category_manager: CategoryManager, category_name, filepaths): def list_available_docs(category_manager: CategoryManager): categories = category_manager.get_all_categories() - + print(f"Available documentation files for testing:\n") - + total = 0 - for category_name in categories.items(): + for category_name in categories: files = category_manager.get_category_files(category_name) total += len(files) print(f"{category_name} docs ({len(files)} files):\n") for doc in sorted(files): print(f" ../docs/{doc}\n") - + print(f"Total: {total} documentation files available for testing\n") -def resolve_files(category_manager: CategoryManager, file_paths: list[str]): +def resolve_files(category_manager: CategoryManager, file_paths: List[str]): result = [] for file_param in file_paths: resolved_files = category_manager.find_matching_files(file_param) @@ -444,7 +574,7 @@ def main(): - If a filename matches multiple files, all matches will be executed """ ) - + parser.add_argument('file_paths', nargs='*', help='Path(s) to the documentation file(s) to test') parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output with detailed diff information') @@ -452,14 +582,14 @@ def main(): help='Custom OpenSearch endpoint (default: http://localhost:9200)') parser.add_argument('--list', '-l', action='store_true', help='List all available documentation files') - + args = parser.parse_args() category_manager = CategoryManager() if args.list: list_available_docs(category_manager) return - + if not args.file_paths: print("No specific files provided. Running full doctest suite...") unittest.main(module=None, argv=['test_docs.py'], exit=False) @@ -467,8 +597,8 @@ def main(): if args.endpoint: global ENDPOINT - ENDPOINT = endpoint - print(f"Using custom endpoint: {endpoint}") + ENDPOINT = args.endpoint + print(f"Using custom endpoint: {args.endpoint}") all_files_to_test = resolve_files(category_manager, args.file_paths) @@ -483,5 +613,6 @@ def main(): sys.exit(0 if all_success else 1) + if __name__ == '__main__': main() diff --git a/prometheus/src/test/java/org/opensearch/sql/prometheus/storage/PrometheusStorageFactoryTest.java b/prometheus/src/test/java/org/opensearch/sql/prometheus/storage/PrometheusStorageFactoryTest.java index 7b1e2dec0f7..3f465b96d2b 100644 --- a/prometheus/src/test/java/org/opensearch/sql/prometheus/storage/PrometheusStorageFactoryTest.java +++ b/prometheus/src/test/java/org/opensearch/sql/prometheus/storage/PrometheusStorageFactoryTest.java @@ -130,7 +130,7 @@ void testGetStorageEngineWithWrongAuthType() { .thenReturn(Collections.emptyList()); PrometheusStorageFactory prometheusStorageFactory = new PrometheusStorageFactory(settings); HashMap properties = new HashMap<>(); - properties.put("prometheus.uri", "https://test.com"); + properties.put("prometheus.uri", "https://opensearch.org"); properties.put("prometheus.auth.type", "random"); properties.put("prometheus.auth.region", "us-east-1"); properties.put("prometheus.auth.secret_key", "accessKey"); @@ -150,7 +150,7 @@ void testGetStorageEngineWithNONEAuthType() { .thenReturn(Collections.emptyList()); PrometheusStorageFactory prometheusStorageFactory = new PrometheusStorageFactory(settings); HashMap properties = new HashMap<>(); - properties.put("prometheus.uri", "https://test.com"); + properties.put("prometheus.uri", "https://opensearch.org"); StorageEngine storageEngine = prometheusStorageFactory.getStorageEngine(properties); Assertions.assertTrue(storageEngine instanceof PrometheusStorageEngine); } diff --git a/scripts/docs_exporter/convert_rst_to_md.py b/scripts/docs_exporter/convert_rst_to_md.py new file mode 100644 index 00000000000..2a27b179fe6 --- /dev/null +++ b/scripts/docs_exporter/convert_rst_to_md.py @@ -0,0 +1,536 @@ +#!/usr/bin/env python3 +""" +Convert RST PPL documentation to Markdown format. + +This script converts RST files with os> prompts to Markdown with clean code fences. +""" + +import re +import sys +from pathlib import Path +from typing import Dict, List, Optional, Tuple + + +def convert_rst_table_to_markdown(table_lines: List[str]) -> Optional[str]: + """Convert RST grid table to Markdown table.""" + # Extract rows (lines starting with |) + rows = [line for line in table_lines if line.strip().startswith('|')] + + if len(rows) < 2: + return None + + # Parse cells from each row + parsed_rows = [] + for row in rows: + # Split by | and clean up + cells = [cell.strip() for cell in row.split('|')[1:-1]] + parsed_rows.append(cells) + + # Build markdown table + md_table = [] + + # Header row + md_table.append('| ' + ' | '.join(parsed_rows[0]) + ' |') + + # Separator row + md_table.append('| ' + ' | '.join(['---'] * len(parsed_rows[0])) + ' |') + + # Data rows + for row in parsed_rows[1:]: + md_table.append('| ' + ' | '.join(row) + ' |') + + return '\n'.join(md_table) + + +def convert_inline_code(text: str) -> str: + """Convert RST inline code ``code`` to Markdown `code`.""" + # Special case: ```*``` (three backticks) renders as `*` with backticks visible + # Convert ```*``` to `` `*` `` in Markdown + text = re.sub(r'```([^`]+)```', r'`` `\1` ``', text) + + # Convert regular ``code`` to `code` + text = re.sub(r'``([^`]+)``', r'`\1`', text) + return text + + +def convert_links(text: str) -> str: + """Convert RST links to Markdown links.""" + # Convert `link text `_ to [link text](url) + # Also convert .rst to .md for internal links + def replace_link(match): + link_text = match.group(1).strip() + url = match.group(2) + # Convert .rst to .md for all links (including GitHub URLs and anchors) + url = re.sub(r'\.rst(#|$)', r'.md\1', url) + return f'[{link_text}]({url})' + + # More specific regex: match backtick, non-greedy text, space, , backtick, underscore + text = re.sub(r'`([^`<]+?)\s*<([^>]+)>`_', replace_link, text) + + # Convert section references `Section Name`_ to [Section Name](#section-name) + # These are internal anchor links to headings in the same document + def replace_section_ref(match): + section_name = match.group(1) + # Convert to lowercase and replace spaces with hyphens for anchor + # Keep underscores as they are (don't convert to hyphens) + anchor = section_name.lower().replace(' ', '-') + # Remove special characters that aren't valid in anchors (but keep underscores and hyphens) + anchor = re.sub(r'[^\w\-]', '', anchor) + return f'[{section_name}](#{anchor})' + + text = re.sub(r'`([^`<]+)`_', replace_section_ref, text) + + return text + + +def convert_heading(line: str, next_line: Optional[str], heading_map: Dict[str, int]) -> Optional[str]: + """Convert RST heading to Markdown. + + Args: + line: The heading text + next_line: The underline + heading_map: Dict mapping underline chars to heading levels + + Returns: + Markdown heading string or None + """ + if not next_line: + return None + + # Detect underline character + underline_char = None + if re.match(r'^=+$', next_line): + underline_char = '=' + elif re.match(r'^-+$', next_line): + underline_char = '-' + elif re.match(r'^\^+$', next_line): + underline_char = '^' + elif re.match(r'^~+$', next_line): + underline_char = '~' + elif re.match(r'^>+$', next_line): + underline_char = '>' + elif re.match(r'^`+$', next_line): + underline_char = '`' + + if underline_char: + # Assign level based on first appearance (after title) + if underline_char not in heading_map: + # Start at H2 (H1 is reserved for title with overline) + heading_map[underline_char] = len(heading_map) + 2 + + level = heading_map[underline_char] + prefix = '#' * level + return f"{prefix} {line}\n" + + return None + + +def parse_os_block(block_content: str) -> Tuple[str, str]: + """Parse a block with os>, PPL>, or > prompts into command and output.""" + lines = block_content.strip().split('\n') + + command_lines = [] + output_lines = [] + in_output = False + + for line in lines: + # Handle os>, PPL>, OS>, and > prompts (case-insensitive for os/ppl) + line_lower = line.lower() + if line_lower.startswith('os> ') or line_lower.startswith('ppl> ') or line.startswith('> '): + # Command line - remove prompt prefix and ; suffix + if line_lower.startswith('os> '): + cmd = line[4:].rstrip(';').strip() + elif line_lower.startswith('ppl> '): + cmd = line[5:].rstrip(';').strip() + else: # > + cmd = line[2:].rstrip(';').strip() + command_lines.append(cmd) + elif line.strip() == '': + # Blank line separates command from output + if command_lines and not in_output: + in_output = True + else: + # Output line + in_output = True + output_lines.append(line) + + # Format command with pipes on separate lines + if command_lines: + command = command_lines[0] + # Split on | and format nicely + if '|' in command: + parts = [p.strip() for p in command.split('|')] + command = parts[0] + '\n' + '\n'.join(f'| {p}' for p in parts[1:]) + else: + command = '' + + return command, '\n'.join(output_lines) + + +def split_multiple_queries(block_content: str) -> List[str]: + """Split a block with multiple os>/PPL>/OS> queries into separate blocks.""" + lines = block_content.strip().split('\n') + blocks = [] + current_block = [] + + for line in lines: + # Check if this is a new query prompt (case-insensitive) + line_lower = line.lower() + if (line_lower.startswith('os> ') or line_lower.startswith('ppl> ') or line.startswith('> ')) and current_block: + # Save the previous block + blocks.append('\n'.join(current_block)) + current_block = [line] + else: + current_block.append(line) + + # Don't forget the last block + if current_block: + blocks.append('\n'.join(current_block)) + + return blocks if len(blocks) > 1 else [block_content] + + +def convert_code_block_to_markdown(block_content: str) -> Optional[str]: + """Convert RST code block to Markdown code fences.""" + # Check if there are multiple queries in this block + query_blocks = split_multiple_queries(block_content) + + md_parts = [] + for query_block in query_blocks: + command, output = parse_os_block(query_block) + + if not command: + continue + + # Create Markdown code fences + md = f"```ppl\n{command}\n```\n" + + if output: + md += f"\nExpected output:\n\n```text\n{output}\n```" + + md_parts.append(md) + + return '\n\n'.join(md_parts) if md_parts else None + + +def convert_rst_to_markdown(rst_content: str) -> str: + """Convert RST content to Markdown.""" + lines = rst_content.split('\n') + md_lines = [] + i = 0 + skip_next = False + heading_map: Dict[str, int] = {} # Track underline char to heading level mapping + + while i < len(lines): + if skip_next: + skip_next = False + i += 1 + continue + + line = lines[i] + next_line = lines[i + 1] if i + 1 < len(lines) else None + prev_line = lines[i - 1] if i > 0 else None + + # Check for title with overline (e.g., ====\nTitle\n====) + if (prev_line and next_line and + re.match(r'^=+$', prev_line.strip()) and + re.match(r'^=+$', next_line.strip()) and + len(prev_line.strip()) == len(next_line.strip())): + # This is a title (H1) with overline + md_lines.append(f"# {line}\n") + skip_next = True + i += 1 + continue + + # Skip RST artifacts (standalone underlines) + if line.strip() and re.match(r'^[=\-\^~]+$', line.strip()): + i += 1 + continue + + # Check for headings (underline only) + heading = convert_heading(line, next_line, heading_map) + if heading: + md_lines.append(heading) + skip_next = True + i += 1 + continue + + # Check for RST grid tables (lines starting with +---+) + if line.strip().startswith('+') and '-' in line and '+' in line: + # Collect the entire table + table_lines = [] + j = i + while j < len(lines) and (lines[j].strip().startswith('+') or lines[j].strip().startswith('|')): + table_lines.append(lines[j]) + j += 1 + + if table_lines: + # Convert RST table to markdown + md_table = convert_rst_table_to_markdown(table_lines) + if md_table: + md_lines.append(md_table + '\n') + i = j + continue + + # Check for list-table directive + if line.strip().startswith('.. list-table::'): + # Extract table caption if present + caption = line.strip()[15:].strip() + i += 1 + + # Skip options (like :widths:, :header-rows:) + while i < len(lines) and lines[i].strip().startswith(':'): + i += 1 + + # Skip blank line + if i < len(lines) and lines[i].strip() == '': + i += 1 + + # Collect table rows (lines starting with * -) + table_rows = [] + current_row = [] + + while i < len(lines): + line_content = lines[i] + + # New row starts with * - + if line_content.strip().startswith('* -'): + if current_row: + table_rows.append(current_row) + current_row = [line_content.strip()[3:].strip()] + i += 1 + # Continuation of cell (starts with - or indented) + elif line_content.strip().startswith('- ') and current_row: + current_row.append(line_content.strip()[2:].strip()) + i += 1 + # End of table + elif line_content.strip() == '' or not (line_content.startswith(' ') or line_content.strip().startswith('-')): + if current_row: + table_rows.append(current_row) + break + else: + i += 1 + + # Convert to markdown table + if table_rows: + if caption: + md_lines.append(f"{caption}\n\n") + + # Header row + md_lines.append('| ' + ' | '.join(table_rows[0]) + ' |') + md_lines.append('| ' + ' | '.join(['---'] * len(table_rows[0])) + ' |') + + # Data rows + for row in table_rows[1:]: + md_lines.append('| ' + ' | '.join(row) + ' |') + + md_lines.append('\n') + continue + + # Check for image directive + if line.strip().startswith('.. image::'): + image_url = line.strip()[10:].strip() + # Use the URL as alt text (can be improved if there's a :alt: option) + md_lines.append(f'![Image]({image_url})\n') + i += 1 + # Skip any image options (like :alt:, :width:, etc.) + while i < len(lines) and lines[i].strip().startswith(':'): + i += 1 + continue + + # Check for other RST directives to skip + if line.strip().startswith('..'): + # Skip directive and its options + while i < len(lines) and (lines[i].strip().startswith('..') or + lines[i].strip().startswith(':') or + lines[i].strip() == ''): + i += 1 + continue + + # Remove pipe prefix from description lines + if line.startswith('| '): + line = line[2:] + + # Convert links + line = convert_links(line) + + # Convert inline code + line = convert_inline_code(line) + + # Detect subsections (lines that look like subsection titles before code blocks) + if (i + 1 < len(lines) and + not line.startswith('#') and + line.strip() and + not line.strip().startswith('*') and + not line.strip().startswith('-') and + lines[i + 1].strip() and + not lines[i + 1].startswith(' ') and + len(line) < 80 and + (i + 2 < len(lines) and + (lines[i + 2].strip().startswith('The ') or + lines[i + 2].strip().startswith('This ')))): + # This looks like a subsection title + md_lines.append(f"### {line}\n") + i += 1 + continue + + # Check for RST directives + if line.strip().startswith('.. code-block::'): + # Extract language if present + match = re.match(r'\s*\.\. code-block::\s*(\w+)?', line) + lang = match.group(1) if match and match.group(1) else 'text' + + # Look ahead for indented block + j = i + 1 + # Skip blank line after directive + if j < len(lines) and lines[j].strip() == '': + j += 1 + + block_lines = [] + while j < len(lines) and (lines[j].startswith(' ') or lines[j].strip() == ''): + if lines[j].startswith(' '): + block_lines.append(lines[j][3:]) + else: + block_lines.append(lines[j]) + j += 1 + + if block_lines: + md_lines.append(f'```{lang}') + md_lines.extend(block_lines) + md_lines.append('```\n') + i = j + continue + + # Check for .. list-table:: directive + if line.strip().startswith('.. list-table::'): + # Skip the directive - tables need manual conversion + md_lines.append('**Table:**\n') + i += 1 + # Skip options and blank lines + while i < len(lines) and (lines[i].strip().startswith(':') or lines[i].strip() == ''): + i += 1 + continue + + # Check for .. note:: directive + if line.strip().startswith('.. note::'): + md_lines.append('> **Note:**') + i += 1 + # Get the note content (indented lines) + while i < len(lines) and (lines[i].startswith(' ') or lines[i].strip() == ''): + if lines[i].startswith(' '): + md_lines.append(f'> {lines[i][3:]}') + elif lines[i].strip(): + md_lines.append(f'> {lines[i]}') + else: + md_lines.append('>') + i += 1 + md_lines.append('') + continue + + # Check for code block marker (:: at end of line) + if line.strip().endswith('::'): + # Look ahead for indented block + j = i + 1 + # Skip blank line after :: + if j < len(lines) and lines[j].strip() == '': + j += 1 + + block_lines = [] + # Check for any indentation (tabs or spaces) + while j < len(lines) and (lines[j].startswith('\t') or lines[j].startswith(' ') or + lines[j].startswith(' ') or lines[j].startswith(' ') or + lines[j].startswith(' ') or lines[j].strip() == ''): + if lines[j].startswith('\t'): + block_lines.append(lines[j][1:]) + elif lines[j].startswith(' '): + block_lines.append(lines[j][4:]) + elif lines[j].startswith(' '): + block_lines.append(lines[j][3:]) + elif lines[j].startswith(' '): + block_lines.append(lines[j][2:]) + elif lines[j].startswith(' '): + block_lines.append(lines[j][1:]) + else: + block_lines.append(lines[j]) + j += 1 + + if block_lines: + block_content = '\n'.join(block_lines) + + # Check if it has os>, PPL>, or > prompts + if 'os>' in block_content or 'PPL>' in block_content or block_content.strip().startswith('>'): + md_block = convert_code_block_to_markdown(block_content) + if md_block: + # Add the description line before :: (if not "PPL query") + desc_line = line.rstrip(':').strip() + if desc_line and desc_line.lower() not in ['ppl query', 'query']: + md_lines.append(desc_line + '\n') + md_lines.append(md_block + '\n') + i = j + continue + else: + # Generic code block without prompts - wrap in markdown fence + desc_line = line.rstrip(':').strip() + if desc_line and desc_line.lower() not in ['example', 'result', 'result set']: + md_lines.append(desc_line + '\n') + md_lines.append('```bash\n' + block_content + '\n```\n') + i = j + continue + + # If no indented block found, just remove the :: + md_lines.append(line.rstrip(':').strip()) + i += 1 + continue + + # Regular line + if line.strip(): # Skip empty lines at the start + md_lines.append(line) + i += 1 + + return '\n'.join(md_lines) + + +def convert_file(rst_path: Path, md_path: Path) -> None: + """Convert a single RST file to Markdown.""" + print(f"Converting {rst_path} -> {md_path}") + + rst_content = rst_path.read_text(encoding='utf-8') + + # Convert + md_content = convert_rst_to_markdown(rst_content) + + # Write output + md_path.write_text(md_content, encoding='utf-8') + + print(f" ✓ Converted successfully") + + +def main() -> None: + if len(sys.argv) < 2: + print("Usage: python convert_rst_to_md.py [output_md_file]") + print(" or: python convert_rst_to_md.py --batch ") + sys.exit(1) + + if sys.argv[1] == '--batch': + # Batch convert all RST files in directory + directory = Path(sys.argv[2]) if len(sys.argv) > 2 else Path('docs/user/ppl/cmd') + + rst_files = list(directory.glob('*.rst')) + print(f"Found {len(rst_files)} RST files in {directory}") + + for rst_file in rst_files: + md_file = rst_file.with_suffix('.md') + try: + convert_file(rst_file, md_file) + except Exception as e: + print(f" ✗ Error: {e}") + else: + # Single file conversion + rst_file = Path(sys.argv[1]) + md_file = Path(sys.argv[2]) if len(sys.argv) > 2 else rst_file.with_suffix('.md') + + convert_file(rst_file, md_file) + + +if __name__ == '__main__': + main() diff --git a/scripts/docs_exporter/export_to_docs_website.py b/scripts/docs_exporter/export_to_docs_website.py new file mode 100755 index 00000000000..0b34984a006 --- /dev/null +++ b/scripts/docs_exporter/export_to_docs_website.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +""" +Minimal markdown exporter for OpenSearch SQL documentation. +Exports docs/user/ppl to ../documentation-website/_search-plugins/sql/ +""" + +import re +from pathlib import Path +from typing import Optional + +def extract_title(content: str) -> Optional[str]: + """Extract title from first H1 heading or return None.""" + match = re.search(r'^#\s+(.+)$', content, re.MULTILINE) + return match.group(1).strip() if match else None + +def generate_frontmatter(title: Optional[str], parent: Optional[str] = None, nav_order: int = 1, has_children: bool = False) -> str: + """Generate Jekyll front-matter.""" + fm = ["---", "layout: default"] + if title: + fm.append(f"title: {title}") + if parent: + fm.append(f"parent: {parent}") + fm.append(f"nav_order: {nav_order}") + if has_children: + fm.append("has_children: true") + fm.append("---\n") + return "\n".join(fm) + +def process_file(source_file: Path, target_file: Path, parent: Optional[str] = None, nav_order: int = 1) -> None: + """Process a single markdown file.""" + content = source_file.read_text(encoding='utf-8') + + # Convert PPL code fences to SQL + content = re.sub(r'^```ppl\b.*$', '```sql', content, flags=re.MULTILINE) + + # Add copy buttons after code fences + content = re.sub(r'^```(bash|sh|sql)\b.*?\n(.*?)^```$', + r'```\1\n\2```\n{% include copy.html %}', + content, flags=re.MULTILINE | re.DOTALL) + + # Remove .md extension from relative links (keep http/https links unchanged) + content = re.sub(r'\]\((?!https?://)(.*?)\.md(#[^\)]*)?\)', r'](\1\2)', content) + + title = extract_title(content) or source_file.stem.replace('-', ' ').title() + + # Check if this directory has child markdown files in subdirectories + has_children = any(source_file.parent.glob('*/*.md')) + + frontmatter = generate_frontmatter(title, parent, nav_order, has_children) + + # Create target directory + target_file.parent.mkdir(parents=True, exist_ok=True) + + # Write file with front-matter + target_file.write_text(frontmatter + content, encoding='utf-8') + +def export_docs() -> None: + """Export PPL docs to documentation website.""" + source_dir = Path("../../docs/user/ppl") + target_dir = Path("../../../documentation-website/_sql-and-ppl/ppl-reference") + + if not source_dir.exists(): + print(f"Source directory {source_dir} not found") + return + + # Check if target directory exists and has files + if target_dir.exists() and any(target_dir.glob('**/*.md')): + response = input(f"Target directory {target_dir} contains files. Overwrite? (y/n): ") + if response.lower() != 'y': + print("Export cancelled") + return + + # Get all markdown files + md_files = list(source_dir.glob('**/*.md')) + + for i, md_file in enumerate(md_files, 1): + # Calculate relative path from source + rel_path = md_file.relative_to(source_dir) + target_file = target_dir / rel_path + + # Determine parent based on directory structure + parent = ( + "SQL and PPL" + if rel_path.parent == Path(".") + else rel_path.parent.name.replace("-", " ").title() + ) + + process_file(md_file, target_file, parent, i) + print(f"Exported: {md_file} -> {target_file}") + + # Generate index.md for each directory + dirs = set(md_file.relative_to(source_dir).parent for md_file in md_files) + for dir_path in sorted(dirs): + if dir_path == Path("."): + continue + target_index = target_dir / dir_path / "index.md" + title = dir_path.name.replace("-", " ").title() + parent = "Opensearch Ppl Reference Manual" if dir_path.parent == Path(".") else dir_path.parent.name.replace("-", " ").title() + frontmatter = generate_frontmatter(title, parent, has_children=True) + target_index.write_text(frontmatter, encoding='utf-8') + print(f"Generated: {target_index}") + +if __name__ == "__main__": + export_docs() diff --git a/scripts/docs_exporter/fix_markdown_formatting.py b/scripts/docs_exporter/fix_markdown_formatting.py new file mode 100755 index 00000000000..b8dbc3a59b3 --- /dev/null +++ b/scripts/docs_exporter/fix_markdown_formatting.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +Comprehensive markdown formatting script for docs/user/ppl/**/*.md +- Adds proper spacing before code blocks and tables for Jekyll compatibility +- Adds double spaces after headers and list items for proper line breaks +- Adds blank lines after lists end +""" + +import re +from pathlib import Path +from typing import List + +def fix_markdown_formatting(content: str) -> str: + """Fix markdown formatting by adding proper spacing and line breaks.""" + lines = content.split('\n') + fixed_lines: List[str] = [] + in_code_block = False + in_table = False + + for i, line in enumerate(lines): + # Check if current line is a code block start/end + is_code_block_marker = line.startswith('```') + + # Check if current line is any table line (including separator rows) + is_table_line = line.strip().startswith('|') and line.strip().endswith('|') + + # Check if it's a data row (not a separator) + is_table_separator = re.match(r'^\s*\|[\s\-\|:]*\|\s*$', line) + is_table_row = is_table_line and not is_table_separator + + # Get previous and next lines for context + prev_line = lines[i-1] if i > 0 else '' + next_line = lines[i+1] if i+1 < len(lines) else '' + + # Convert blank lines after code blocks/tables to double-space lines + if not line.strip() and prev_line.strip(): + prev_is_code_end = prev_line.startswith('```') + prev_is_table_end = (prev_line.strip().startswith('|') and prev_line.strip().endswith('|') and + not re.match(r'^\s*\|[\s\-\|:]*\|\s*$', prev_line)) + if prev_is_code_end or (prev_is_table_end and not in_table): + line = ' ' + + # Add spacing BEFORE code blocks and tables (check BEFORE updating in_table) + if not in_code_block: + # Opening code block + if is_code_block_marker: + if fixed_lines and fixed_lines[-1].strip(): + fixed_lines.append(' ') + elif fixed_lines and not fixed_lines[-1].strip(): + fixed_lines[-1] = ' ' + + # Starting table (first table row) + elif is_table_row and not in_table: + if fixed_lines and fixed_lines[-1].strip(): + fixed_lines.append(' ') + elif fixed_lines and not fixed_lines[-1].strip(): + fixed_lines[-1] = ' ' + + # Detect table start/end (AFTER spacing logic) - use is_table_line to include separators + if not in_code_block: + if is_table_line and not in_table: + in_table = True + elif not is_table_line and in_table: + in_table = False + + # Process line for double spaces (only outside code blocks) + if not in_code_block and not is_code_block_marker and line.strip(): + # Add double spaces to headers and list items if not already present + is_header = line.startswith('#') + is_list = (line.strip().startswith('* ') or + line.strip().startswith('- ') or + line.strip().startswith('+ ') or + re.match(r'^\s*\d+\.\s', line)) + + if (is_header or is_list) and not line.endswith(' '): + line = line + ' ' + + fixed_lines.append(line) + + # Update code block state AFTER processing the line + if is_code_block_marker: + in_code_block = not in_code_block + + # Add spacing AFTER code blocks and tables + if not in_code_block: + # Closing code block + if is_code_block_marker and next_line.strip(): + if i+1 < len(lines) and not lines[i+1].strip(): + pass # Will convert blank line when we reach it + else: + fixed_lines.append(' ') + + # Ending table (last table row before non-table content) + elif is_table_row and in_table and next_line.strip() and not (next_line.strip().startswith('|') and next_line.strip().endswith('|')): + if i+1 < len(lines) and not lines[i+1].strip(): + pass # Will convert blank line when we reach it + else: + fixed_lines.append(' ') + + # Add blank line after list ends (only outside code blocks and tables) + if not in_code_block and not is_code_block_marker and not in_table: + current_is_list = (line.strip().startswith('* ') or + line.strip().startswith('- ') or + line.strip().startswith('+ ') or + re.match(r'^\s*\d+\.\s', line)) + next_is_not_list = (next_line.strip() and + not next_line.strip().startswith('* ') and + not next_line.strip().startswith('- ') and + not next_line.strip().startswith('+') and + not re.match(r'^\s*\d+\.\s', next_line) and + not next_line.strip().startswith(' ')) # Not indented continuation + + # Add blank line after list ends (with double spaces) + if current_is_list and next_is_not_list: + fixed_lines.append(' ') + + return '\n'.join(fixed_lines) + +def process_file(file_path: Path) -> bool: + """Process a single markdown file.""" + content = file_path.read_text(encoding='utf-8') + + fixed_content = fix_markdown_formatting(content) + + # Only write if content changed + if fixed_content != content: + file_path.write_text(fixed_content, encoding='utf-8') + print(f"Fixed: {file_path}") + return True + return False + +def main() -> None: + """Fix all markdown files in docs/user/ppl/""" + # Get the directory where this script is located + script_dir = Path(__file__).parent + + # Define path relative to the script location + source_dir = script_dir / "../../docs/user/ppl" + + if not source_dir.exists(): + print(f"Source directory {source_dir} not found") + return + + print("Fixing markdown formatting:") + print("- Adding double-space lines above and below code blocks") + print("- Adding double-space lines above and below tables") + print("- Adding double spaces after headers and list items") + print("- Adding blank lines after lists") + print() + + md_files = list(source_dir.glob('**/*.md')) + fixed_count = 0 + + for md_file in md_files: + if process_file(md_file): + fixed_count += 1 + + print(f"\nProcessed {len(md_files)} files, fixed {fixed_count} files") + +if __name__ == "__main__": + main() From b34e374972dbea0b02c259f4f1f436d20689984c Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 9 Dec 2025 17:16:55 -0800 Subject: [PATCH 99/99] merged main branch Signed-off-by: Asif Bashar --- docs/user/ppl/functions/conversion.md | 68 ++++++- docs/user/ppl/functions/conversion.rst | 263 ------------------------- 2 files changed, 67 insertions(+), 264 deletions(-) delete mode 100644 docs/user/ppl/functions/conversion.rst diff --git a/docs/user/ppl/functions/conversion.md b/docs/user/ppl/functions/conversion.md index a33a93bbd69..200fe84e1aa 100644 --- a/docs/user/ppl/functions/conversion.md +++ b/docs/user/ppl/functions/conversion.md @@ -269,4 +269,70 @@ fetched rows / total rows = 1/1 | TRUE | +-------------+ ``` - \ No newline at end of file + +## TONUMBER + +### Description + +The following usage options are available, depending on the parameter +types and the number of parameters. + +Usage: tonumber(string, \[base\]) converts the value in first argument. +The second argument describe the base of first argument. If second +argument is not provided, then it converts to base 10 number +representation. + +Return type: Number + +You can use this function with the eval commands and as part of eval +expressions. Base values can be between 2 and 36. The maximum value +supported for base 10 is +(2-2\^-52)·2\^1023 and minimum is +-(2-2\^-52)·2\^1023. The maximum for other supported bases is 2\^63-1 +(or 7FFFFFFFFFFFFFFF) and minimum is -2\^63 (or -7FFFFFFFFFFFFFFF). If +the tonumber function cannot parse a field value to a number, the +function returns NULL. You can use this function to convert a string +representation of a binary number to return the corresponding number in +base 10. + +Following example converts a string in binary to the number +representation: + + os> source=people | eval int_value = tonumber('010101',2) | fields int_value | head 1 + fetched rows / total rows = 1/1 + +-----------+ + | int_value | + |-----------| + | 21.0 | + +-----------+ + +Following example converts a string in hex to the number representation: + + os> source=people | eval int_value = tonumber('FA34',16) | fields int_value | head 1 + fetched rows / total rows = 1/1 + +-----------+ + | int_value | + |-----------| + | 64052.0 | + +-----------+ + +Following example converts a string in decimal to the number +representation: + + os> source=people | eval int_value = tonumber('4598') | fields int_value | head 1 + fetched rows / total rows = 1/1 + +-----------+ + | int_value | + |-----------| + | 4598.0 | + +-----------+ + +Following example converts a string in decimal with fraction to the +number representation: + + os> source=people | eval double_value = tonumber('4598.678') | fields double_value | head 1 + fetched rows / total rows = 1/1 + +--------------+ + | double_value | + |--------------| + | 4598.678 | + +--------------+ diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst deleted file mode 100644 index 93eee712d94..00000000000 --- a/docs/user/ppl/functions/conversion.rst +++ /dev/null @@ -1,263 +0,0 @@ -========================= -Type Conversion Functions -========================= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -CAST ----- - -Description ->>>>>>>>>>> - -Usage: cast(expr as dateType) cast the expr to dataType. return the value of dataType. The following conversion rules are used: - -+------------+--------+--------+---------+-------------+--------+--------+--------+ -| Src/Target | STRING | NUMBER | BOOLEAN | TIMESTAMP | DATE | TIME | IP | -+------------+--------+--------+---------+-------------+--------+--------+--------+ -| STRING | | Note1 | Note1 | TIMESTAMP() | DATE() | TIME() | IP() | -+------------+--------+--------+---------+-------------+--------+--------+--------+ -| NUMBER | Note1 | | v!=0 | N/A | N/A | N/A | N/A | -+------------+--------+--------+---------+-------------+--------+--------+--------+ -| BOOLEAN | Note1 | v?1:0 | | N/A | N/A | N/A | N/A | -+------------+--------+--------+---------+-------------+--------+--------+--------+ -| TIMESTAMP | Note1 | N/A | N/A | | DATE() | TIME() | N/A | -+------------+--------+--------+---------+-------------+--------+--------+--------+ -| DATE | Note1 | N/A | N/A | N/A | | N/A | N/A | -+------------+--------+--------+---------+-------------+--------+--------+--------+ -| TIME | Note1 | N/A | N/A | N/A | N/A | | N/A | -+------------+--------+--------+---------+-------------+--------+--------+--------+ -| IP | Note2 | N/A | N/A | N/A | N/A | N/A | | -+------------+--------+--------+---------+-------------+--------+--------+--------+ - -Note1: the conversion follow the JDK specification. - -Note2: IP will be converted to its canonical representation. Canonical representation -for IPv6 is described in `RFC 5952 `_. - -Cast to string example:: - - os> source=people | eval `cbool` = CAST(true as string), `cint` = CAST(1 as string), `cdate` = CAST(CAST('2012-08-07' as date) as string) | fields `cbool`, `cint`, `cdate` - fetched rows / total rows = 1/1 - +-------+------+------------+ - | cbool | cint | cdate | - |-------+------+------------| - | TRUE | 1 | 2012-08-07 | - +-------+------+------------+ - -Cast to number example:: - - os> source=people | eval `cbool` = CAST(true as int), `cstring` = CAST('1' as int) | fields `cbool`, `cstring` - fetched rows / total rows = 1/1 - +-------+---------+ - | cbool | cstring | - |-------+---------| - | 1 | 1 | - +-------+---------+ - -Cast to date example:: - - os> source=people | eval `cdate` = CAST('2012-08-07' as date), `ctime` = CAST('01:01:01' as time), `ctimestamp` = CAST('2012-08-07 01:01:01' as timestamp) | fields `cdate`, `ctime`, `ctimestamp` - fetched rows / total rows = 1/1 - +------------+----------+---------------------+ - | cdate | ctime | ctimestamp | - |------------+----------+---------------------| - | 2012-08-07 | 01:01:01 | 2012-08-07 01:01:01 | - +------------+----------+---------------------+ - -Cast function can be chained:: - - os> source=people | eval `cbool` = CAST(CAST(true as string) as boolean) | fields `cbool` - fetched rows / total rows = 1/1 - +-------+ - | cbool | - |-------| - | True | - +-------+ - - -IMPLICIT (AUTO) TYPE CONVERSION -------------------------------- - -Implicit conversion is automatic casting. When a function does not have an exact match for the -input types, the engine looks for another signature that can safely work with the values. It picks -the option that requires the least stretching of the original types, so you can mix literals and -fields without adding ``CAST`` everywhere. - -String to numeric ->>>>>>>>>>>>>>>>> - -When a string stands in for a number we simply parse the text: - -- The value must be something like ``"3.14"`` or ``"42"``. Anything else causes the query to fail. -- If a string appears next to numeric arguments, it is treated as a ``DOUBLE`` so the numeric - overload of the function can run. - -Use string in arithmetic operator example :: - - os> source=people | eval divide="5"/10, multiply="5" * 10, add="5" + 10, minus="5" - 10, concat="5" + "5" | fields divide, multiply, add, minus, concat - fetched rows / total rows = 1/1 - +--------+----------+------+-------+--------+ - | divide | multiply | add | minus | concat | - |--------+----------+------+-------+--------| - | 0.5 | 50.0 | 15.0 | -5.0 | 55 | - +--------+----------+------+-------+--------+ - -Use string in comparison operator example :: - - os> source=people | eval e="1000"==1000, en="1000"!=1000, ed="1000"==1000.0, edn="1000"!=1000.0, l="1000">999, ld="1000">999.9, i="malformed"==1000 | fields e, en, ed, edn, l, ld, i - fetched rows / total rows = 1/1 - +------+-------+------+-------+------+------+------+ - | e | en | ed | edn | l | ld | i | - |------+-------+------+-------+------+------+------| - | True | False | True | False | True | True | null | - +------+-------+------+-------+------+------+------+ - - - -TONUMBER --------- - -Description ->>>>>>>>>>> - -The following usage options are available, depending on the parameter types and the number of parameters. - -Usage: tonumber(string, [base]) converts the value in first argument. The second argument describe the base of first argument. If second argument is not provided, then it converts to base 10 number representation. - -Return type: Number - - -You can use this function with the eval commands and as part of eval expressions. -Base values can be between 2 and 36. The maximum value supported for base 10 is +(2-2^-52)·2^1023 and minimum is -(2-2^-52)·2^1023. -The maximum for other supported bases is 2^63-1 (or 7FFFFFFFFFFFFFFF) and minimum is -2^63 (or -7FFFFFFFFFFFFFFF). -If the tonumber function cannot parse a field value to a number, the function returns NULL. -You can use this function to convert a string representation of a binary number to return the corresponding number in base 10. - -Following example converts a string in binary to the number representation:: - - os> source=people | eval int_value = tonumber('010101',2) | fields int_value | head 1 - fetched rows / total rows = 1/1 - +-----------+ - | int_value | - |-----------| - | 21.0 | - +-----------+ - - -Following example converts a string in hex to the number representation:: - - os> source=people | eval int_value = tonumber('FA34',16) | fields int_value | head 1 - fetched rows / total rows = 1/1 - +-----------+ - | int_value | - |-----------| - | 64052.0 | - +-----------+ - -Following example converts a string in decimal to the number representation:: - - os> source=people | eval int_value = tonumber('4598') | fields int_value | head 1 - fetched rows / total rows = 1/1 - +-----------+ - | int_value | - |-----------| - | 4598.0 | - +-----------+ - -Following example converts a string in decimal with fraction to the number representation:: - - os> source=people | eval double_value = tonumber('4598.678') | fields double_value | head 1 - fetched rows / total rows = 1/1 - +--------------+ - | double_value | - |--------------| - | 4598.678 | - +--------------+ - -TOSTRING ------------ - -Description ->>>>>>>>>>> -The following usage options are available, depending on the parameter types and the number of parameters. - -Usage with format type: tostring(ANY, [format]): Converts the value in first argument to provided format type string in second argument. If second argument is not provided, then it converts to default string representation. -Return type: string - -Usage for boolean parameter without format type tostring(boolean): Converts the string to 'TRUE' or 'FALSE'. -Return type: string - -You can use this function with the eval commands and as part of eval expressions. If first argument can be any valid type , second argument is optional and if provided , it needs to be format name to convert to where first argument contains only numbers. If first argument is boolean, then second argument is not used even if its provided. - -Format types: - -a) "binary" Converts a number to a binary value. -b) "hex" Converts the number to a hexadecimal value. -c) "commas" Formats the number with commas. If the number includes a decimal, the function rounds the number to nearest two decimal places. -d) "duration" Converts the value in seconds to the readable time format HH:MM:SS. -e) "duration_millis" Converts the value in milliseconds to the readable time format HH:MM:SS. - -The format argument is optional and is only used when the value argument is a number. The tostring function supports the following formats. - -Basic examples: - -You can use this function to convert a number to a string of its binary representation. -Example:: -city, city.name, city.location.latitude - os> source=accounts | where firstname = "Amber" | eval balance_binary = tostring(balance, "binary") | fields firstname, balance_binary, balance - fetched rows / total rows = 1/1 - +-----------+------------------+---------+ - | firstname | balance_binary | balance | - |-----------+------------------+---------| - | Amber | 1001100100111001 | 39225 | - +-----------+------------------+---------+ - - -You can use this function to convert a number to a string of its hex representation. -Example:: - - os> source=accounts | where firstname = "Amber" | eval balance_hex = tostring(balance, "hex") | fields firstname, balance_hex, balance - fetched rows / total rows = 1/1 - +-----------+-------------+---------+ - | firstname | balance_hex | balance | - |-----------+-------------+---------| - | Amber | 9939 | 39225 | - +-----------+-------------+---------+ - -The following example formats the column totalSales to display values with commas. -Example:: - - os> source=accounts | where firstname = "Amber" | eval balance_commas = tostring(balance, "commas") | fields firstname, balance_commas, balance - fetched rows / total rows = 1/1 - +-----------+----------------+---------+ - | firstname | balance_commas | balance | - |-----------+----------------+---------| - | Amber | 39,225 | 39225 | - +-----------+----------------+---------+ - -The following example converts number of seconds to HH:MM:SS format representing hours, minutes and seconds. -Example:: - - os> source=accounts | where firstname = "Amber" | eval duration = tostring(6500, "duration") | fields firstname, duration - fetched rows / total rows = 1/1 - +-----------+----------+ - | firstname | duration | - |-----------+----------| - | Amber | 01:48:20 | - +-----------+----------+ - -The following example for converts boolean parameter to string. -Example:: - - os> source=accounts | where firstname = "Amber"| eval `boolean_str` = tostring(1=1)| fields `boolean_str` - fetched rows / total rows = 1/1 - +-------------+ - | boolean_str | - |-------------| - | TRUE | - +-------------+