diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index d02276a5be6..9a772251ec5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -691,7 +691,19 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) { context.relBuilder.field(node.getAlias()), context.relBuilder.field(PatternUtils.SAMPLE_LOGS)); flattenParsedPattern(node.getAlias(), parsedNode, context, false); - context.relBuilder.projectExcept(context.relBuilder.field(PatternUtils.SAMPLE_LOGS)); + // Reorder fields for consistency with Brain's output + projectPlusOverriding( + List.of( + context.relBuilder.field(node.getAlias()), + context.relBuilder.field(PatternUtils.PATTERN_COUNT), + context.relBuilder.field(PatternUtils.TOKENS), + context.relBuilder.field(PatternUtils.SAMPLE_LOGS)), + List.of( + node.getAlias(), + PatternUtils.PATTERN_COUNT, + PatternUtils.TOKENS, + PatternUtils.SAMPLE_LOGS), + context); } else { RexNode parsedNode = PPLFuncImpTable.INSTANCE.resolve( @@ -2258,7 +2270,7 @@ private void flattenParsedPattern( String originalPatternResultAlias, RexNode parsedNode, CalcitePlanContext context, - boolean flattenPatternCount) { + boolean flattenPatternAggResult) { List fattenedNodes = new ArrayList<>(); List projectNames = new ArrayList<>(); // Flatten map struct fields @@ -2274,7 +2286,7 @@ private void flattenParsedPattern( true); fattenedNodes.add(context.relBuilder.alias(patternExpr, originalPatternResultAlias)); projectNames.add(originalPatternResultAlias); - if (flattenPatternCount) { + if (flattenPatternAggResult) { RexNode patternCountExpr = context.rexBuilder.makeCast( context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT), @@ -2300,6 +2312,24 @@ private void flattenParsedPattern( true); fattenedNodes.add(context.relBuilder.alias(tokensExpr, PatternUtils.TOKENS)); projectNames.add(PatternUtils.TOKENS); + if (flattenPatternAggResult) { + RexNode sampleLogsExpr = + context.rexBuilder.makeCast( + context + .rexBuilder + .getTypeFactory() + .createArrayType( + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), -1), + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.INTERNAL_ITEM, + parsedNode, + context.rexBuilder.makeLiteral(PatternUtils.SAMPLE_LOGS)), + true, + true); + fattenedNodes.add(context.relBuilder.alias(sampleLogsExpr, PatternUtils.SAMPLE_LOGS)); + projectNames.add(PatternUtils.SAMPLE_LOGS); + } projectPlusOverriding(fattenedNodes, projectNames, context); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/LogPatternAggFunction.java b/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/LogPatternAggFunction.java index 5ea04d389e3..576b105bdc5 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/LogPatternAggFunction.java +++ b/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/LogPatternAggFunction.java @@ -184,7 +184,8 @@ public Object value(Object... argList) { PatternUtils.PATTERN, parseResult.toTokenOrderString(PatternUtils.WILDCARD_PREFIX), PatternUtils.PATTERN_COUNT, count, - PatternUtils.TOKENS, tokensMap); + PatternUtils.TOKENS, tokensMap, + PatternUtils.SAMPLE_LOGS, sampleLogs); }) .collect(Collectors.toList()); } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java index dcb2684d537..c9d1c0b158c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLPatternsIT.java @@ -88,19 +88,22 @@ public void testSimplePatternAggregationMode() throws IOException { result, schema("pattern_count", "bigint"), schema("patterns_field", "string"), - schema("tokens", "struct")); + schema("tokens", "struct"), + schema("sample_logs", "array")); verifyDataRows( result, rows( - 7, "@.", + 7, ImmutableMap.of( "", ImmutableList.of("amberduke", "hattiebond", "nanettebates"), "", ImmutableList.of("pyrami", "netagy", "quility"), "", - ImmutableList.of("com", "com", "com")))); + ImmutableList.of("com", "com", "com")), + ImmutableList.of( + "amberduke@pyrami.com", "hattiebond@netagy.com", "nanettebates@quility.com"))); } @Test @@ -168,7 +171,8 @@ public void testBrainAggregationMode() throws IOException { result, schema("patterns_field", "string"), schema("pattern_count", "bigint"), - schema("tokens", "struct")); + schema("tokens", "struct"), + schema("sample_logs", "array")); verifyDataRows( result, rows( @@ -178,7 +182,10 @@ public void testBrainAggregationMode() throws IOException { "", ImmutableList.of("for", "for"), "", - ImmutableList.of("-1547954353065580372", "6996194389878584395"))), + ImmutableList.of("-1547954353065580372", "6996194389878584395")), + ImmutableList.of( + "Verification succeeded for blk_-1547954353065580372", + "Verification succeeded for blk_6996194389878584395")), rows( "BLOCK* NameSystem.addStoredBlock: blockMap updated: is added to blk_" + " size ", @@ -189,7 +196,12 @@ public void testBrainAggregationMode() throws IOException { "", ImmutableList.of("67108864", "67108864"), "", - ImmutableList.of("-7017553867379051457", "-3249711809227781266"))), + ImmutableList.of("-7017553867379051457", "-3249711809227781266")), + ImmutableList.of( + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to" + + " blk_-7017553867379051457 size 67108864", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added" + + " to blk_-3249711809227781266 size 67108864")), rows( " NameSystem.allocateBlock:" + " /user/root/sortrand/_temporary/_task___r__/part" @@ -209,7 +221,14 @@ public void testBrainAggregationMode() throws IOException { "", ImmutableList.of("0002", "0002"), "", - ImmutableList.of("200811092030", "200811092030"))), + ImmutableList.of("200811092030", "200811092030")), + ImmutableList.of( + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000296_0/part-00296." + + " blk_-6620182933895093708", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000318_0/part-00318." + + " blk_2096692261399680562")), rows( "PacketResponder failed blk_", 2, @@ -217,7 +236,10 @@ public void testBrainAggregationMode() throws IOException { "", ImmutableList.of("for", "for"), "", - ImmutableList.of("6996194389878584395", "-1547954353065580372")))); + ImmutableList.of("6996194389878584395", "-1547954353065580372")), + ImmutableList.of( + "PacketResponder failed for blk_6996194389878584395", + "PacketResponder failed for blk_-1547954353065580372"))); } @Test @@ -229,12 +251,14 @@ public void testBrainAggregationModeWithGroupByClause() throws IOException { + " mode=aggregation max_sample_count=5" + " variable_count_threshold=2 frequency_threshold_percentage=0.2", TEST_INDEX_HDFS_LOGS)); + System.out.println(result); verifySchema( result, schema("level", "string"), schema("patterns_field", "string"), schema("pattern_count", "bigint"), - schema("tokens", "struct")); + schema("tokens", "struct"), + schema("sample_logs", "array")); verifyDataRows( result, rows( @@ -242,7 +266,10 @@ public void testBrainAggregationModeWithGroupByClause() throws IOException { "Verification succeeded for blk_", 2, ImmutableMap.of( - "", ImmutableList.of("-1547954353065580372", "6996194389878584395"))), + "", ImmutableList.of("-1547954353065580372", "6996194389878584395")), + ImmutableList.of( + "Verification succeeded for blk_-1547954353065580372", + "Verification succeeded for blk_6996194389878584395")), rows( "INFO", "BLOCK* NameSystem.addStoredBlock: blockMap updated: is added to blk_" @@ -254,7 +281,12 @@ public void testBrainAggregationModeWithGroupByClause() throws IOException { "", ImmutableList.of("67108864", "67108864"), "", - ImmutableList.of("-7017553867379051457", "-3249711809227781266"))), + ImmutableList.of("-7017553867379051457", "-3249711809227781266")), + ImmutableList.of( + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to" + + " blk_-7017553867379051457 size 67108864", + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added" + + " to blk_-3249711809227781266 size 67108864")), rows( "INFO", "BLOCK* NameSystem.allocateBlock:" @@ -273,13 +305,23 @@ public void testBrainAggregationModeWithGroupByClause() throws IOException { "", ImmutableList.of("000296", "000318"), "", - ImmutableList.of("0002", "0002"))), + ImmutableList.of("0002", "0002")), + ImmutableList.of( + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000296_0/part-00296." + + " blk_-6620182933895093708", + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000318_0/part-00318." + + " blk_2096692261399680562")), rows( "WARN", "PacketResponder failed for blk_", 2, ImmutableMap.of( - "", ImmutableList.of("6996194389878584395", "-1547954353065580372")))); + "", ImmutableList.of("6996194389878584395", "-1547954353065580372")), + ImmutableList.of( + "PacketResponder failed for blk_6996194389878584395", + "PacketResponder failed for blk_-1547954353065580372"))); } @Test diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_brain_agg_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_brain_agg_push.json index 36b6b90b7d1..c3fc80d1eb0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_brain_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_brain_agg_push.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(patterns_field=[SAFE_CAST(ITEM($1, 'pattern'))], pattern_count=[SAFE_CAST(ITEM($1, 'pattern_count'))], tokens=[SAFE_CAST(ITEM($1, 'tokens'))])\n LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n LogicalAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n LogicalProject(email=[$9], $f17=[10], $f18=[100000])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n Uncollect\n LogicalProject(patterns_field=[$cor0.patterns_field])\n LogicalValues(tuples=[[{ 0 }]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=['pattern'], expr#3=[ITEM($t1, $t2)], expr#4=[SAFE_CAST($t3)], expr#5=['pattern_count'], expr#6=[ITEM($t1, $t5)], expr#7=[SAFE_CAST($t6)], expr#8=['tokens'], expr#9=[ITEM($t1, $t8)], expr#10=[SAFE_CAST($t9)], patterns_field=[$t4], pattern_count=[$t7], tokens=[$t10])\n EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n EnumerableAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[10], expr#2=[100000], proj#0..2=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[email]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"email\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableUncollect\n EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.patterns_field], patterns_field=[$t2])\n EnumerableValues(tuples=[[{ 0 }]])\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(patterns_field=[SAFE_CAST(ITEM($1, 'pattern'))], pattern_count=[SAFE_CAST(ITEM($1, 'pattern_count'))], tokens=[SAFE_CAST(ITEM($1, 'tokens'))], sample_logs=[SAFE_CAST(ITEM($1, 'sample_logs'))])\n LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n LogicalAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n LogicalProject(email=[$9], $f17=[10], $f18=[100000])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n Uncollect\n LogicalProject(patterns_field=[$cor0.patterns_field])\n LogicalValues(tuples=[[{ 0 }]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=['pattern'], expr#3=[ITEM($t1, $t2)], expr#4=[SAFE_CAST($t3)], expr#5=['pattern_count'], expr#6=[ITEM($t1, $t5)], expr#7=[SAFE_CAST($t6)], expr#8=['tokens'], expr#9=[ITEM($t1, $t8)], expr#10=[SAFE_CAST($t9)], expr#11=['sample_logs'], expr#12=[ITEM($t1, $t11)], expr#13=[SAFE_CAST($t12)], patterns_field=[$t4], pattern_count=[$t7], tokens=[$t10], sample_logs=[$t13])\n EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n EnumerableAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n EnumerableCalc(expr#0=[{inputs}], expr#1=[10], expr#2=[100000], proj#0..2=[{exprs}])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[email]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"email\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n EnumerableUncollect\n EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.patterns_field], patterns_field=[$t2])\n EnumerableValues(tuples=[[{ 0 }]])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.json b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.json index 196dce8fecf..2716bfe9e83 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(pattern_count=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'pattern'))], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'tokens'))])\n LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n LogicalProject(email=[$9], patterns_field=[REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>')], $f18=[10])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t1, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], pattern_count=[$t0], $f1=[$t6], $f2=[$t9])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000, PROJECT->[pattern_count, patterns_field, sample_logs]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"patterns_field\":{\"terms\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQGy3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQBx3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiA5LAogICAgICAibmFtZSI6ICIkOSIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIlthLXpBLVowLTldKyIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICI8Kj4iLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogMwogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAC3QADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAReHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABx4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+AB4AAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABJ+cQB+AAt0AAZTVFJJTkd+cQB+ABh0AAdLZXl3b3JkcQB+AB14dAAHYWRkcmVzc3NxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAB4dAAHYmFsYW5jZXEAfgANdAAGZ2VuZGVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAEY2l0eXNxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAJxAH4AInEAfgAjeHQACGVtcGxveWVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAFc3RhdGVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h0AANhZ2VxAH4ADXQABWVtYWlsc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAIbGFzdG5hbWVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h4AHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"pattern_count\":{\"value_count\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQGy3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQBx3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiA5LAogICAgICAibmFtZSI6ICIkOSIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIlthLXpBLVowLTldKyIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICI8Kj4iLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogMwogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAC3QADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAReHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABx4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+AB4AAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABJ+cQB+AAt0AAZTVFJJTkd+cQB+ABh0AAdLZXl3b3JkcQB+AB14dAAHYWRkcmVzc3NxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAB4dAAHYmFsYW5jZXEAfgANdAAGZ2VuZGVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAEY2l0eXNxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAJxAH4AInEAfgAjeHQACGVtcGxveWVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAFc3RhdGVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h0AANhZ2VxAH4ADXQABWVtYWlsc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAIbGFzdG5hbWVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h4AHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}}}},\"sample_logs\":{\"top_hits\":{\"from\":0,\"size\":10,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"_source\":{\"includes\":[\"email\"],\"excludes\":[]}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'pattern'))], pattern_count=[$1], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'tokens'))], sample_logs=[$2])\n LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n LogicalProject(email=[$9], patterns_field=[REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>')], $f18=[10])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t1)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], $f0=[$t6], pattern_count=[$t2], $f2=[$t9], sample_logs=[$t1])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000, PROJECT->[patterns_field, sample_logs, pattern_count]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"patterns_field\":{\"terms\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQGy3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQBx3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiA5LAogICAgICAibmFtZSI6ICIkOSIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIlthLXpBLVowLTldKyIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICI8Kj4iLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogMwogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAC3QADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAReHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABx4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+AB4AAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABJ+cQB+AAt0AAZTVFJJTkd+cQB+ABh0AAdLZXl3b3JkcQB+AB14dAAHYWRkcmVzc3NxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAB4dAAHYmFsYW5jZXEAfgANdAAGZ2VuZGVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAEY2l0eXNxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAJxAH4AInEAfgAjeHQACGVtcGxveWVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAFc3RhdGVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h0AANhZ2VxAH4ADXQABWVtYWlsc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAIbGFzdG5hbWVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h4AHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"pattern_count\":{\"value_count\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQGy3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQBx3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfUkVQTEFDRSIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiaW5wdXQiOiA5LAogICAgICAibmFtZSI6ICIkOSIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIlthLXpBLVowLTldKyIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICI8Kj4iLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogMwogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAC3QADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAReHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABx4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+AB4AAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABJ+cQB+AAt0AAZTVFJJTkd+cQB+ABh0AAdLZXl3b3JkcQB+AB14dAAHYWRkcmVzc3NxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAB4dAAHYmFsYW5jZXEAfgANdAAGZ2VuZGVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAEY2l0eXNxAH4AEHEAfgAWcQB+ABlxAH4AHXNxAH4AAAAAAAN3BAAAAAJxAH4AInEAfgAjeHQACGVtcGxveWVyc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAFc3RhdGVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h0AANhZ2VxAH4ADXQABWVtYWlsc3EAfgAQcQB+ABZxAH4AGXEAfgAdc3EAfgAAAAAAA3cEAAAAAnEAfgAicQB+ACN4dAAIbGFzdG5hbWVzcQB+ABBxAH4AFnEAfgAZcQB+AB1zcQB+AAAAAAADdwQAAAACcQB+ACJxAH4AI3h4AHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}}}},\"sample_logs\":{\"top_hits\":{\"from\":0,\"size\":10,\"version\":false,\"seq_no_primary_term\":false,\"explain\":false,\"_source\":{\"includes\":[\"email\"],\"excludes\":[]}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_patterns_brain_agg_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_patterns_brain_agg_push.json index 6389c6dbc70..bd4d36474e2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_patterns_brain_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_patterns_brain_agg_push.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(patterns_field=[SAFE_CAST(ITEM($1, 'pattern'))], pattern_count=[SAFE_CAST(ITEM($1, 'pattern_count'))], tokens=[SAFE_CAST(ITEM($1, 'tokens'))])\n LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n LogicalAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n LogicalProject(email=[$9], $f17=[10], $f18=[100000])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n Uncollect\n LogicalProject(patterns_field=[$cor0.patterns_field])\n LogicalValues(tuples=[[{ 0 }]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=['pattern'], expr#3=[ITEM($t1, $t2)], expr#4=[SAFE_CAST($t3)], expr#5=['pattern_count'], expr#6=[ITEM($t1, $t5)], expr#7=[SAFE_CAST($t6)], expr#8=['tokens'], expr#9=[ITEM($t1, $t8)], expr#10=[SAFE_CAST($t9)], patterns_field=[$t4], pattern_count=[$t7], tokens=[$t10])\n EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n EnumerableAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[10], expr#18=[100000], email=[$t9], $f17=[$t17], $f18=[$t18])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n EnumerableUncollect\n EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.patterns_field], patterns_field=[$t2])\n EnumerableValues(tuples=[[{ 0 }]])\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(patterns_field=[SAFE_CAST(ITEM($1, 'pattern'))], pattern_count=[SAFE_CAST(ITEM($1, 'pattern_count'))], tokens=[SAFE_CAST(ITEM($1, 'tokens'))], sample_logs=[SAFE_CAST(ITEM($1, 'sample_logs'))])\n LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n LogicalAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n LogicalProject(email=[$9], $f17=[10], $f18=[100000])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n Uncollect\n LogicalProject(patterns_field=[$cor0.patterns_field])\n LogicalValues(tuples=[[{ 0 }]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=['pattern'], expr#3=[ITEM($t1, $t2)], expr#4=[SAFE_CAST($t3)], expr#5=['pattern_count'], expr#6=[ITEM($t1, $t5)], expr#7=[SAFE_CAST($t6)], expr#8=['tokens'], expr#9=[ITEM($t1, $t8)], expr#10=[SAFE_CAST($t9)], expr#11=['sample_logs'], expr#12=[ITEM($t1, $t11)], expr#13=[SAFE_CAST($t12)], patterns_field=[$t4], pattern_count=[$t7], tokens=[$t10], sample_logs=[$t13])\n EnumerableCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n EnumerableAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=[10], expr#18=[100000], email=[$t9], $f17=[$t17], $f18=[$t18])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n EnumerableUncollect\n EnumerableCalc(expr#0=[{inputs}], expr#1=[$cor0], expr#2=[$t1.patterns_field], patterns_field=[$t2])\n EnumerableValues(tuples=[[{ 0 }]])\n" } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_patterns_simple_pattern_agg_push.json b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_patterns_simple_pattern_agg_push.json index 0f0830c05e5..73affef775a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_patterns_simple_pattern_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_patterns_simple_pattern_agg_push.json @@ -1,6 +1,6 @@ { "calcite": { - "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(pattern_count=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'pattern'))], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'tokens'))])\n LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n LogicalProject(email=[$9], patterns_field=[REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>')], $f18=[10])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", - "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], pattern_count=[$t1], patterns_field=[$t6], tokens=[$t9])\n EnumerableAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=['[a-zA-Z0-9]+':VARCHAR], expr#18=['<*>'], expr#19=[REGEXP_REPLACE($t9, $t17, $t18)], expr#20=[10], email=[$t9], patterns_field=[$t19], $f18=[$t20])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" + "logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'pattern'))], pattern_count=[$1], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'tokens'))], sample_logs=[$2])\n LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n LogicalProject(email=[$9], patterns_field=[REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>')], $f18=[10])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", + "physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], patterns_field=[$t6], pattern_count=[$t1], tokens=[$t9], sample_logs=[$t2])\n EnumerableAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=['[a-zA-Z0-9]+':VARCHAR], expr#18=['<*>'], expr#19=[REGEXP_REPLACE($t9, $t17, $t18)], expr#20=[10], email=[$t9], patterns_field=[$t19], $f18=[$t20])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n" } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java index c7bd64d5cab..467dd36f548 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java @@ -150,8 +150,9 @@ public void testPatternsAggregationModeForSimplePatternMethod() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(pattern_count=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2)," - + " 'pattern'))], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'tokens'))])\n" + "LogicalProject(patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'pattern'))]," + + " pattern_count=[$1], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'tokens'))]," + + " sample_logs=[$2])\n" + " LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0," + " $2)])\n" + " LogicalProject(ENAME=[$1], patterns_field=[REGEXP_REPLACE($1," @@ -160,11 +161,12 @@ public void testPatternsAggregationModeForSimplePatternMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT COUNT(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')) `pattern_count`," - + " SAFE_CAST(`PATTERN_PARSER`(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')," + "SELECT SAFE_CAST(`PATTERN_PARSER`(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')," + " `TAKE`(`ENAME`, 10))['pattern'] AS STRING) `patterns_field`," + + " COUNT(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')) `pattern_count`," + " SAFE_CAST(`PATTERN_PARSER`(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')," - + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`," + + " `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -176,9 +178,9 @@ public void testPatternsAggregationModeWithGroupByForSimplePatternMethod() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(DEPTNO=[$0], pattern_count=[$2]," - + " patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($1, $3), 'pattern'))]," - + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($1, $3), 'tokens'))])\n" + "LogicalProject(DEPTNO=[$0], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($1, $3)," + + " 'pattern'))], pattern_count=[$2], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($1, $3)," + + " 'tokens'))], sample_logs=[$3])\n" + " LogicalAggregate(group=[{1, 2}], pattern_count=[COUNT($2)], sample_logs=[TAKE($0," + " $3)])\n" + " LogicalProject(ENAME=[$1], DEPTNO=[$7], patterns_field=[REGEXP_REPLACE($1," @@ -187,11 +189,12 @@ public void testPatternsAggregationModeWithGroupByForSimplePatternMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `DEPTNO`, COUNT(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')) `pattern_count`," - + " SAFE_CAST(`PATTERN_PARSER`(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')," - + " `TAKE`(`ENAME`, 10))['pattern'] AS STRING) `patterns_field`," + "SELECT `DEPTNO`, SAFE_CAST(`PATTERN_PARSER`(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+'," + + " '<*>'), `TAKE`(`ENAME`, 10))['pattern'] AS STRING) `patterns_field`," + + " COUNT(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')) `pattern_count`," + " SAFE_CAST(`PATTERN_PARSER`(REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')," - + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`," + + " `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`, REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>')"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -205,7 +208,7 @@ public void testPatternsAggregationModeForBrainMethod() { String expectedLogical = "LogicalProject(patterns_field=[SAFE_CAST(ITEM($1, 'pattern'))]," + " pattern_count=[SAFE_CAST(ITEM($1, 'pattern_count'))], tokens=[SAFE_CAST(ITEM($1," - + " 'tokens'))])\n" + + " 'tokens'))], sample_logs=[SAFE_CAST(ITEM($1, 'sample_logs'))])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}])\n" + " LogicalAggregate(group=[{}], patterns_field=[pattern($0, $1, $2)])\n" + " LogicalProject(ENAME=[$1], $f8=[10], $f9=[100000])\n" @@ -223,7 +226,8 @@ public void testPatternsAggregationModeForBrainMethod() { "SELECT SAFE_CAST(`t20`.`patterns_field`['pattern'] AS STRING) `patterns_field`," + " SAFE_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT) `pattern_count`," + " SAFE_CAST(`t20`.`patterns_field`['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" - + " `tokens`\n" + + " `tokens`, SAFE_CAST(`t20`.`patterns_field`['sample_logs'] AS VARCHAR ARRAY)" + + " `sample_logs`\n" + "FROM (SELECT `pattern`(`ENAME`, 10, 100000) `patterns_field`\n" + "FROM `scott`.`EMP`) `$cor0`,\n" + "LATERAL UNNEST (SELECT `$cor0`.`patterns_field`\n" @@ -239,7 +243,7 @@ public void testPatternsAggregationModeWithGroupByForBrainMethod() { String expectedLogical = "LogicalProject(DEPTNO=[$0], patterns_field=[SAFE_CAST(ITEM($2, 'pattern'))]," + " pattern_count=[SAFE_CAST(ITEM($2, 'pattern_count'))], tokens=[SAFE_CAST(ITEM($2," - + " 'tokens'))])\n" + + " 'tokens'))], sample_logs=[SAFE_CAST(ITEM($2, 'sample_logs'))])\n" + " LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}])\n" + " LogicalAggregate(group=[{1}], patterns_field=[pattern($0, $2, $3)])\n" + " LogicalProject(ENAME=[$1], DEPTNO=[$7], $f8=[10], $f9=[100000])\n" @@ -257,7 +261,8 @@ public void testPatternsAggregationModeWithGroupByForBrainMethod() { "SELECT `$cor0`.`DEPTNO`, SAFE_CAST(`t20`.`patterns_field`['pattern'] AS STRING)" + " `patterns_field`, SAFE_CAST(`t20`.`patterns_field`['pattern_count'] AS BIGINT)" + " `pattern_count`, SAFE_CAST(`t20`.`patterns_field`['tokens'] AS MAP< VARCHAR," - + " VARCHAR ARRAY >) `tokens`\n" + + " VARCHAR ARRAY >) `tokens`, SAFE_CAST(`t20`.`patterns_field`['sample_logs'] AS" + + " VARCHAR ARRAY) `sample_logs`\n" + "FROM (SELECT `DEPTNO`, `pattern`(`ENAME`, 10, 100000) `patterns_field`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`) `$cor0`,\n"